checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b453470fc3b7f050334ee9bd0fe33269de819a5d952e30325708e450487c58d0
4
+ data.tar.gz: e10f924ab8691a5ce966f24f2bb24bb313d764480326db50e9206d3d9f9cb894
5
+ SHA512:
6
+ metadata.gz: 18da3e66f1ce1260399843ec9995f081f945e37521537db9afe175511c723a7216d80b507811d3aefb45ad3dbbb2c51bfe291f23a3cf413faeb33f967fd29d6b
7
+ data.tar.gz: f17f454e99988c817c7332c6cc4bb7c548c3e7e2b0c59cb2a57a0802d71668b1b0a500fbb8ac8a28c37b1ba1f5034c5764c6f914549dfc37e6387b878500e6c6
data/CHANGELOG ADDED
@@ -0,0 +1 @@
1
+ v0.0.1 stuff
data/LICENSE ADDED
@@ -0,0 +1,203 @@
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ https://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ https://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
203
+
data/Manifest ADDED
@@ -0,0 +1,35 @@
1
+ CHANGELOG
2
+ LICENSE
3
+ NOTICE
4
+ Manifest
5
+ Rakefile
6
+ avro.gemspec
7
+ interop/test_interop.rb
8
+ lib/avro.rb
9
+ lib/avro/data_file.rb
10
+ lib/avro/io.rb
11
+ lib/avro/ipc.rb
12
+ lib/avro/logical_types.rb
13
+ lib/avro/protocol.rb
14
+ lib/avro/schema.rb
15
+ lib/avro/schema_compatibility.rb
16
+ lib/avro/schema_normalization.rb
17
+ lib/avro/schema_validator.rb
18
+ test/case_finder.rb
19
+ test/random_data.rb
20
+ test/sample_ipc_client.rb
21
+ test/sample_ipc_http_client.rb
22
+ test/sample_ipc_http_server.rb
23
+ test/sample_ipc_server.rb
24
+ test/test_datafile.rb
25
+ test/test_fingerprints.rb
26
+ test/test_help.rb
27
+ test/test_io.rb
28
+ test/test_logical_types.rb
29
+ test/test_protocol.rb
30
+ test/test_schema.rb
31
+ test/test_schema_compatibility.rb
32
+ test/test_schema_normalization.rb
33
+ test/test_schema_validator.rb
34
+ test/test_socket_transport.rb
35
+ test/tool.rb
data/NOTICE ADDED
@@ -0,0 +1,6 @@
1
+ Apache Avro
2
+ Copyright 2010-2015 The Apache Software Foundation
3
+
4
+ This product includes software developed at
5
+ The Apache Software Foundation (https://www.apache.org/).
6
+
data/Rakefile ADDED
@@ -0,0 +1,60 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'rubygems'
18
+ require 'echoe'
19
+ VERSION = File.open('../../share/VERSION.txt').read.sub('-SNAPSHOT', '.pre1').chomp
20
+ Echoe.new('avro', VERSION) do |p|
21
+ p.author = "Apache Software Foundation"
22
+ p.email = "dev@avro.apache.org"
23
+ p.summary = "Apache Avro for Ruby"
24
+ p.description = "Avro is a data serialization and RPC format"
25
+ p.url = "https://avro.apache.org/"
26
+ p.runtime_dependencies = ["multi_json ~>1"]
27
+ p.licenses = ["Apache-2.0"]
28
+ end
29
+
30
+ t = Rake::TestTask.new(:interop)
31
+ t.pattern = 'interop/test*.rb'
32
+
33
+ task :generate_interop do
34
+ $:.unshift(HERE + '/lib')
35
+ $:.unshift(HERE + '/test')
36
+ require 'avro'
37
+ require 'random_data'
38
+
39
+ schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
40
+ r = RandomData.new(schema, ENV['SEED'])
41
+ Avro::DataFile.codecs.each do |name, codec|
42
+ next unless codec
43
+ filename = name == 'null' ? 'ruby.avro' : "ruby_#{name}.avro"
44
+ path = File.join(BUILD, 'interop/data', filename)
45
+ Avro::DataFile.open(path, 'w', schema.to_s, name) do |writer|
46
+ writer << r.next
47
+ end
48
+ end
49
+ end
50
+
51
+
52
+ HERE = File.expand_path(File.dirname(__FILE__))
53
+ SHARE = HERE + '/../../share'
54
+ SCHEMAS = SHARE + '/test/schemas'
55
+ BUILD = HERE + '/../../build'
56
+
57
+ task :dist => [:gem] do
58
+ mkdir_p "../../dist/ruby"
59
+ cp "pkg/avro-#{VERSION}.gem", "../../dist/ruby"
60
+ end
data/interop/test_interop.rb ADDED
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'rubygems'
19
+ require 'test/unit'
20
+ require 'avro'
21
+
22
+ CODECS_TO_VALIDATE = ['deflate'] # The 'null' codec is implicitly included
23
+
24
+ class TestInterop < Test::Unit::TestCase
25
+ HERE = File.expand_path(File.dirname(__FILE__))
26
+ SHARE = HERE + '/../../../share'
27
+ SCHEMAS = SHARE + '/test/schemas'
28
+
29
+ files = Dir[HERE + '/../../../build/interop/data/*.avro'].select do |fn|
30
+ sep, codec = File.basename(fn, 'avro').rpartition('_')[1, 2]
31
+ sep.empty? || CODECS_TO_VALIDATE.include?(codec)
32
+ end
33
+
34
+ files.each do |fn|
35
+ define_method("test_read_#{File.basename(fn, 'avro')}") do
36
+ projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
37
+
38
+ File.open(fn) do |f|
39
+ r = Avro::DataFile::Reader.new(f, Avro::IO::DatumReader.new(projection))
40
+ i = 0
41
+ r.each do |datum|
42
+ i += 1
43
+ assert_not_nil datum, "nil datum from #{fn}"
44
+ end
45
+ assert_not_equal 0, i, "no data read in from #{fn}"
46
+ end
47
+ end
48
+ end
49
+ end
data/lib/avro.rb ADDED
@@ -0,0 +1,60 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'multi_json'
18
+ require 'set'
19
+ require 'digest/md5'
20
+ require 'net/http'
21
+ require 'stringio'
22
+ require 'zlib'
23
+
24
+ module Avro
25
+ VERSION = "0.1.0"
26
+
27
+ class AvroError < StandardError; end
28
+
29
+ class AvroTypeError < Avro::AvroError
30
+ def initialize(schm=nil, datum=nil, msg=nil)
31
+ msg ||= "Not a #{schm}: #{datum}"
32
+ super(msg)
33
+ end
34
+ end
35
+
36
+ class << self
37
+ attr_writer :disable_field_default_validation
38
+ attr_writer :disable_schema_name_validation
39
+
40
+ def disable_field_default_validation
41
+ @disable_field_default_validation ||=
42
+ ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
43
+ end
44
+
45
+ def disable_schema_name_validation
46
+ @disable_schema_name_validation ||=
47
+ ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
48
+ end
49
+
50
+ end
51
+ end
52
+
53
+ require 'avro/schema'
54
+ require 'avro/io'
55
+ require 'avro/data_file'
56
+ require 'avro/protocol'
57
+ require 'avro/ipc'
58
+ require 'avro/schema_normalization'
59
+ require 'avro/schema_validator'
60
+ require 'avro/schema_compatibility'
data/lib/avro/data_file.rb ADDED
@@ -0,0 +1,406 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'openssl'
18
+
19
+ module Avro
20
+ module DataFile
21
+ VERSION = 1
22
+ MAGIC = "Obj" + [VERSION].pack('c')
23
+ MAGIC.force_encoding('BINARY') if MAGIC.respond_to?(:force_encoding)
24
+ MAGIC_SIZE = MAGIC.respond_to?(:bytesize) ? MAGIC.bytesize : MAGIC.size
25
+ SYNC_SIZE = 16
26
+ SYNC_INTERVAL = 4000 * SYNC_SIZE
27
+ META_SCHEMA = Schema.parse('{"type": "map", "values": "bytes"}')
28
+ VALID_ENCODINGS = ['binary'] # not used yet
29
+
30
+ class DataFileError < AvroError; end
31
+
32
+ def self.open(file_path, mode='r', schema=nil, codec=nil)
33
+ schema = Avro::Schema.parse(schema) if schema
34
+ case mode
35
+ when 'w'
36
+ unless schema
37
+ raise DataFileError, "Writing an Avro file requires a schema."
38
+ end
39
+ io = open_writer(File.open(file_path, 'wb'), schema, codec)
40
+ when 'r'
41
+ io = open_reader(File.open(file_path, 'rb'), schema)
42
+ else
43
+ raise DataFileError, "Only modes 'r' and 'w' allowed. You gave #{mode.inspect}."
44
+ end
45
+
46
+ yield io if block_given?
47
+ io
48
+ ensure
49
+ io.close if block_given? && io
50
+ end
51
+
52
+ def self.codecs
53
+ @codecs
54
+ end
55
+
56
+ def self.register_codec(codec)
57
+ @codecs ||= {}
58
+ codec = codec.new if !codec.respond_to?(:codec_name) && codec.is_a?(Class)
59
+ @codecs[codec.codec_name.to_s] = codec
60
+ end
61
+
62
+ def self.get_codec(codec)
63
+ codec ||= 'null'
64
+ if codec.respond_to?(:compress) && codec.respond_to?(:decompress)
65
+ codec # it's a codec instance
66
+ elsif codec.is_a?(Class)
67
+ codec.new # it's a codec class
68
+ elsif @codecs.include?(codec.to_s)
69
+ @codecs[codec.to_s] # it's a string or symbol (codec name)
70
+ else
71
+ raise DataFileError, "Unknown codec: #{codec.inspect}"
72
+ end
73
+ end
74
+
75
+ class << self
76
+ private
77
+ def open_writer(file, schema, codec=nil)
78
+ writer = Avro::IO::DatumWriter.new(schema)
79
+ Avro::DataFile::Writer.new(file, writer, schema, codec)
80
+ end
81
+
82
+ def open_reader(file, schema)
83
+ reader = Avro::IO::DatumReader.new(nil, schema)
84
+ Avro::DataFile::Reader.new(file, reader)
85
+ end
86
+ end
87
+
88
+ class Writer
89
+ def self.generate_sync_marker
90
+ OpenSSL::Random.random_bytes(16)
91
+ end
92
+
93
+ attr_reader :writer, :encoder, :datum_writer, :buffer_writer, :buffer_encoder, :sync_marker, :meta, :codec
94
+ attr_accessor :block_count
95
+
96
+ def initialize(writer, datum_writer, writers_schema=nil, codec=nil, meta={})
97
+ # If writers_schema is not present, presume we're appending
98
+ @writer = writer
99
+ @encoder = IO::BinaryEncoder.new(@writer)
100
+ @datum_writer = datum_writer
101
+ @meta = meta
102
+ @buffer_writer = StringIO.new('', 'w')
103
+ @buffer_writer.set_encoding('BINARY') if @buffer_writer.respond_to?(:set_encoding)
104
+ @buffer_encoder = IO::BinaryEncoder.new(@buffer_writer)
105
+ @block_count = 0
106
+
107
+ if writers_schema
108
+ @sync_marker = Writer.generate_sync_marker
109
+ @codec = DataFile.get_codec(codec)
110
+ @meta['avro.codec'] = @codec.codec_name.to_s
111
+ @meta['avro.schema'] = writers_schema.to_s
112
+ datum_writer.writers_schema = writers_schema
113
+ write_header
114
+ else
115
+ # open writer for reading to collect metadata
116
+ dfr = Reader.new(writer, Avro::IO::DatumReader.new)
117
+
118
+ # FIXME(jmhodges): collect arbitrary metadata
119
+ # collect metadata
120
+ @sync_marker = dfr.sync_marker
121
+ @meta['avro.codec'] = dfr.meta['avro.codec']
122
+ @codec = DataFile.get_codec(meta['avro.codec'])
123
+
124
+ # get schema used to write existing file
125
+ schema_from_file = dfr.meta['avro.schema']
126
+ @meta['avro.schema'] = schema_from_file
127
+ datum_writer.writers_schema = Schema.parse(schema_from_file)
128
+
129
+ # seek to the end of the file and prepare for writing
130
+ writer.seek(0,2)
131
+ end
132
+ end
133
+
134
+ # Append a datum to the file
135
+ def <<(datum)
136
+ datum_writer.write(datum, buffer_encoder)
137
+ self.block_count += 1
138
+
139
+ # if the data to write is larger than the sync interval, write
140
+ # the block
141
+ if buffer_writer.tell >= SYNC_INTERVAL
142
+ write_block
143
+ end
144
+ end
145
+
146
+ # Return the current position as a value that may be passed to
147
+ # DataFileReader.seek(long). Forces the end of the current block,
148
+ # emitting a synchronization marker.
149
+ def sync
150
+ write_block
151
+ writer.tell
152
+ end
153
+
154
+ # Flush the current state of the file, including metadata
155
+ def flush
156
+ write_block
157
+ writer.flush
158
+ end
159
+
160
+ def close
161
+ flush
162
+ writer.close
163
+ end
164
+
165
+ private
166
+
167
+ def write_header
168
+ # write magic
169
+ writer.write(MAGIC)
170
+
171
+ # write metadata
172
+ datum_writer.write_data(META_SCHEMA, meta, encoder)
173
+
174
+ # write sync marker
175
+ writer.write(sync_marker)
176
+ end
177
+
178
+ # TODO(jmhodges): make a schema for blocks and use datum_writer
179
+ # TODO(jmhodges): do we really need the number of items in the block?
180
+ def write_block
181
+ if block_count > 0
182
+ # write number of items in block and block size in bytes
183
+ encoder.write_long(block_count)
184
+ to_write = codec.compress(buffer_writer.string)
185
+ encoder.write_long(to_write.respond_to?(:bytesize) ? to_write.bytesize : to_write.size)
186
+
187
+ # write block contents
188
+ writer.write(to_write)
189
+
190
+ # write sync marker
191
+ writer.write(sync_marker)
192
+
193
+ # reset buffer
194
+ buffer_writer.truncate(0)
195
+ buffer_writer.rewind
196
+ self.block_count = 0
197
+ end
198
+ end
199
+ end
200
+
201
+ # Read files written by DataFileWriter
202
+ class Reader
203
+ include ::Enumerable
204
+
205
+ # The reader and binary decoder for the raw file stream
206
+ attr_reader :reader, :decoder
207
+
208
+ # The binary decoder for the contents of a block (after codec decompression)
209
+ attr_reader :block_decoder
210
+
211
+ attr_reader :datum_reader, :sync_marker, :meta, :file_length, :codec
212
+ attr_accessor :block_count # records remaining in current block
213
+
214
+ def initialize(reader, datum_reader)
215
+ @reader = reader
216
+ @decoder = IO::BinaryDecoder.new(reader)
217
+ @datum_reader = datum_reader
218
+
219
+ # read the header: magic, meta, sync
220
+ read_header
221
+
222
+ @codec = DataFile.get_codec(meta['avro.codec'])
223
+
224
+ # get ready to read
225
+ @block_count = 0
226
+ datum_reader.writers_schema = Schema.parse meta['avro.schema']
227
+ end
228
+
229
+ # Iterates through each datum in this file
230
+ # TODO(jmhodges): handle block of length zero
231
+ def each
232
+ loop do
233
+ if block_count == 0
234
+ case
235
+ when eof?; break
236
+ when skip_sync
237
+ break if eof?
238
+ read_block_header
239
+ else
240
+ read_block_header
241
+ end
242
+ end
243
+
244
+ datum = datum_reader.read(block_decoder)
245
+ self.block_count -= 1
246
+ yield(datum)
247
+ end
248
+ end
249
+
250
+ def eof?; reader.eof?; end
251
+
252
+ def close
253
+ reader.close
254
+ end
255
+
256
+ private
257
+ def read_header
258
+ # seek to the beginning of the file to get magic block
259
+ reader.seek(0, 0)
260
+
261
+ # check magic number
262
+ magic_in_file = reader.read(MAGIC_SIZE)
263
+ if magic_in_file.size < MAGIC_SIZE
264
+ msg = 'Not an Avro data file: shorter than the Avro magic block'
265
+ raise DataFileError, msg
266
+ elsif magic_in_file != MAGIC
267
+ msg = "Not an Avro data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
268
+ raise DataFileError, msg
269
+ end
270
+
271
+ # read metadata
272
+ @meta = datum_reader.read_data(META_SCHEMA,
273
+ META_SCHEMA,
274
+ decoder)
275
+ # read sync marker
276
+ @sync_marker = reader.read(SYNC_SIZE)
277
+ end
278
+
279
+ def read_block_header
280
+ self.block_count = decoder.read_long
281
+ block_bytes = decoder.read_long
282
+ data = codec.decompress(reader.read(block_bytes))
283
+ @block_decoder = IO::BinaryDecoder.new(StringIO.new(data))
284
+ end
285
+
286
+ # read the length of the sync marker; if it matches the sync
287
+ # marker, return true. Otherwise, seek back to where we started
288
+ # and return false
289
+ def skip_sync
290
+ proposed_sync_marker = reader.read(SYNC_SIZE)
291
+ if proposed_sync_marker != sync_marker
292
+ reader.seek(-SYNC_SIZE, 1)
293
+ false
294
+ else
295
+ true
296
+ end
297
+ end
298
+ end
299
+
300
+
301
+ class NullCodec
302
+ def codec_name; 'null'; end
303
+ def decompress(data); data; end
304
+ def compress(data); data; end
305
+ end
306
+
307
+ class DeflateCodec
308
+ attr_reader :level
309
+
310
+ def initialize(level=Zlib::DEFAULT_COMPRESSION)
311
+ @level = level
312
+ end
313
+
314
+ def codec_name; 'deflate'; end
315
+
316
+ def decompress(compressed)
317
+ # Passing a negative number to Inflate puts it into "raw" RFC1951 mode
318
+ # (without the RFC1950 header & checksum). See the docs for
319
+ # inflateInit2 in https://www.zlib.net/manual.html
320
+ zstream = Zlib::Inflate.new(-Zlib::MAX_WBITS)
321
+ data = zstream.inflate(compressed)
322
+ data << zstream.finish
323
+ ensure
324
+ zstream.close
325
+ end
326
+
327
+ def compress(data)
328
+ zstream = Zlib::Deflate.new(level, -Zlib::MAX_WBITS)
329
+ compressed = zstream.deflate(data)
330
+ compressed << zstream.finish
331
+ ensure
332
+ zstream.close
333
+ end
334
+ end
335
+
336
+ class SnappyCodec
337
+ def codec_name; 'snappy'; end
338
+
339
+ def decompress(data)
340
+ load_snappy!
341
+ crc32 = data.slice(-4..-1).unpack('N').first
342
+ uncompressed = Snappy.inflate(data.slice(0..-5))
343
+
344
+ if crc32 == Zlib.crc32(uncompressed)
345
+ uncompressed
346
+ else
347
+ # older versions of avro-ruby didn't write the checksum, so if it
348
+ # doesn't match this must assume that it wasn't there and return
349
+ # the entire payload uncompressed.
350
+ Snappy.inflate(data)
351
+ end
352
+ rescue Snappy::Error
353
+ # older versions of avro-ruby didn't write the checksum, so removing
354
+ # the last 4 bytes may cause Snappy to fail. recover by assuming the
355
+ # payload is from an older file and uncompress the entire buffer.
356
+ Snappy.inflate(data)
357
+ end
358
+
359
+ def compress(data)
360
+ load_snappy!
361
+ crc32 = Zlib.crc32(data)
362
+ compressed = Snappy.deflate(data)
363
+ [compressed, crc32].pack('a*N')
364
+ end
365
+
366
+ private
367
+
368
+ def load_snappy!
369
+ require 'snappy' unless defined?(Snappy)
370
+ rescue LoadError
371
+ raise LoadError, "Snappy compression is not available, please install the `snappy` gem."
372
+ end
373
+ end
374
+
375
+ class ZstandardCodec
376
+ def codec_name; 'zstandard'; end
377
+
378
+ def decompress(data)
379
+ load_zstandard!
380
+ Zstd.decompress(data)
381
+ end
382
+
383
+ def compress(data)
384
+ load_zstandard!
385
+ Zstd.compress(data)
386
+ end
387
+
388
+ private
389
+
390
+ def load_zstandard!
391
+ require 'zstd-ruby' unless defined?(Zstd)
392
+ rescue LoadError
393
+ raise LoadError, "Zstandard compression is not available, please install the `zstd-ruby` gem."
394
+ end
395
+ end
396
+
397
+ DataFile.register_codec NullCodec
398
+ DataFile.register_codec DeflateCodec
399
+ DataFile.register_codec SnappyCodec
400
+ DataFile.register_codec ZstandardCodec
401
+
402
+ # TODO this constant won't be updated if you register another codec.
403
+ # Deprecated in favor of Avro::DataFile::codecs
404
+ VALID_CODECS = DataFile.codecs.keys
405
+ end
406
+ end
data/lib/avro/io.rb ADDED
@@ -0,0 +1,599 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ module IO
19
+ # Raised when datum is not an example of schema
20
+ class AvroTypeError < AvroError
21
+ def initialize(expected_schema, datum)
22
+ super("The datum #{datum.inspect} is not an example of schema #{expected_schema}")
23
+ end
24
+ end
25
+
26
+ # Raised when writer's and reader's schema do not match
27
+ class SchemaMatchException < AvroError
28
+ def initialize(writers_schema, readers_schema)
29
+ super("Writer's schema #{writers_schema} and Reader's schema " +
30
+ "#{readers_schema} do not match.")
31
+ end
32
+ end
33
+
34
+ # FIXME(jmhodges) move validate to this module?
35
+
36
+ class BinaryDecoder
37
+ # Read leaf values
38
+
39
+ # reader is an object on which we can call read, seek and tell.
40
+ attr_reader :reader
41
+ def initialize(reader)
42
+ @reader = reader
43
+ end
44
+
45
+ def byte!
46
+ @reader.readbyte
47
+ end
48
+
49
+ def read_null
50
+ # null is written as zero byte's
51
+ nil
52
+ end
53
+
54
+ def read_boolean
55
+ byte! == 1
56
+ end
57
+
58
+ def read_int; read_long; end
59
+
60
+ def read_long
61
+ # int and long values are written using variable-length,
62
+ # zig-zag coding.
63
+ b = byte!
64
+ n = b & 0x7F
65
+ shift = 7
66
+ while (b & 0x80) != 0
67
+ b = byte!
68
+ n |= (b & 0x7F) << shift
69
+ shift += 7
70
+ end
71
+ (n >> 1) ^ -(n & 1)
72
+ end
73
+
74
+ def read_float
75
+ # A float is written as 4 bytes.
76
+ # The float is converted into a 32-bit integer using a method
77
+ # equivalent to Java's floatToIntBits and then encoded in
78
+ # little-endian format.
79
+ read_and_unpack(4, 'e'.freeze)
80
+ end
81
+
82
+ def read_double
83
+ # A double is written as 8 bytes.
84
+ # The double is converted into a 64-bit integer using a method
85
+ # equivalent to Java's doubleToLongBits and then encoded in
86
+ # little-endian format.
87
+ read_and_unpack(8, 'E'.freeze)
88
+ end
89
+
90
+ def read_bytes
91
+ # Bytes are encoded as a long followed by that many bytes of
92
+ # data.
93
+ read(read_long)
94
+ end
95
+
96
+ def read_string
97
+ # A string is encoded as a long followed by that many bytes of
98
+ # UTF-8 encoded character data.
99
+ read_bytes.tap do |string|
100
+ string.force_encoding('UTF-8'.freeze) if string.respond_to? :force_encoding
101
+ end
102
+ end
103
+
104
+ def read(len)
105
+ # Read n bytes
106
+ @reader.read(len)
107
+ end
108
+
109
+ def skip_null
110
+ nil
111
+ end
112
+
113
+ def skip_boolean
114
+ skip(1)
115
+ end
116
+
117
+ def skip_int
118
+ skip_long
119
+ end
120
+
121
+ def skip_long
122
+ b = byte!
123
+ while (b & 0x80) != 0
124
+ b = byte!
125
+ end
126
+ end
127
+
128
+ def skip_float
129
+ skip(4)
130
+ end
131
+
132
+ def skip_double
133
+ skip(8)
134
+ end
135
+
136
+ def skip_bytes
137
+ skip(read_long)
138
+ end
139
+
140
+ def skip_string
141
+ skip_bytes
142
+ end
143
+
144
+ def skip(n)
145
+ reader.seek(reader.tell() + n)
146
+ end
147
+
148
+ private
149
+
150
+ # Optimize unpacking strings when `unpack1` is available (ruby >= 2.4)
151
+ if String.instance_methods.include?(:unpack1)
152
+
153
+ def read_and_unpack(byte_count, format)
154
+ @reader.read(byte_count).unpack1(format)
155
+ end
156
+
157
+ else
158
+
159
+ def read_and_unpack(byte_count, format)
160
+ @reader.read(byte_count).unpack(format)[0]
161
+ end
162
+
163
+ end
164
+ end
165
+
166
+ # Write leaf values
167
+ class BinaryEncoder
168
+ attr_reader :writer
169
+
170
+ def initialize(writer)
171
+ @writer = writer
172
+ end
173
+
174
+ # null is written as zero bytes
175
+ def write_null(_datum)
176
+ nil
177
+ end
178
+
179
+ # a boolean is written as a single byte
180
+ # whose value is either 0 (false) or 1 (true).
181
+ def write_boolean(datum)
182
+ on_disk = datum ? 1.chr : 0.chr
183
+ writer.write(on_disk)
184
+ end
185
+
186
+ # int and long values are written using variable-length,
187
+ # zig-zag coding.
188
+ def write_int(n)
189
+ write_long(n)
190
+ end
191
+
192
+ # int and long values are written using variable-length,
193
+ # zig-zag coding.
194
+ def write_long(n)
195
+ n = (n << 1) ^ (n >> 63)
196
+ while (n & ~0x7F) != 0
197
+ @writer.write(((n & 0x7f) | 0x80).chr)
198
+ n >>= 7
199
+ end
200
+ @writer.write(n.chr)
201
+ end
202
+
203
+ # A float is written as 4 bytes.
204
+ # The float is converted into a 32-bit integer using a method
205
+ # equivalent to Java's floatToIntBits and then encoded in
206
+ # little-endian format.
207
+ def write_float(datum)
208
+ @writer.write([datum].pack('e'.freeze))
209
+ end
210
+
211
+ # A double is written as 8 bytes.
212
+ # The double is converted into a 64-bit integer using a method
213
+ # equivalent to Java's doubleToLongBits and then encoded in
214
+ # little-endian format.
215
+ def write_double(datum)
216
+ @writer.write([datum].pack('E'.freeze))
217
+ end
218
+
219
+ # Bytes are encoded as a long followed by that many bytes of data.
220
+ def write_bytes(datum)
221
+ write_long(datum.bytesize)
222
+ @writer.write(datum)
223
+ end
224
+
225
+ # A string is encoded as a long followed by that many bytes of
226
+ # UTF-8 encoded character data
227
+ def write_string(datum)
228
+ datum = datum.encode('utf-8'.freeze) if datum.respond_to? :encode
229
+ write_bytes(datum)
230
+ end
231
+
232
+ # Write an arbritary datum.
233
+ def write(datum)
234
+ writer.write(datum)
235
+ end
236
+ end
237
+
238
+ class DatumReader
239
+ def self.match_schemas(writers_schema, readers_schema)
240
+ Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
241
+ end
242
+
243
+ attr_accessor :writers_schema, :readers_schema
244
+
245
+ def initialize(writers_schema=nil, readers_schema=nil)
246
+ @writers_schema = writers_schema
247
+ @readers_schema = readers_schema
248
+ end
249
+
250
+ def read(decoder)
251
+ self.readers_schema = writers_schema unless readers_schema
252
+ read_data(writers_schema, readers_schema, decoder)
253
+ end
254
+
255
+ def read_data(writers_schema, readers_schema, decoder)
256
+ # schema matching
257
+ unless self.class.match_schemas(writers_schema, readers_schema)
258
+ raise SchemaMatchException.new(writers_schema, readers_schema)
259
+ end
260
+
261
+ # schema resolution: reader's schema is a union, writer's
262
+ # schema is not
263
+ if writers_schema.type_sym != :union && readers_schema.type_sym == :union
264
+ rs = readers_schema.schemas.find{|s|
265
+ self.class.match_schemas(writers_schema, s)
266
+ }
267
+ return read_data(writers_schema, rs, decoder) if rs
268
+ raise SchemaMatchException.new(writers_schema, readers_schema)
269
+ end
270
+
271
+ # function dispatch for reading data based on type of writer's
272
+ # schema
273
+ datum = case writers_schema.type_sym
274
+ when :null; decoder.read_null
275
+ when :boolean; decoder.read_boolean
276
+ when :string; decoder.read_string
277
+ when :int; decoder.read_int
278
+ when :long; decoder.read_long
279
+ when :float; decoder.read_float
280
+ when :double; decoder.read_double
281
+ when :bytes; decoder.read_bytes
282
+ when :fixed; read_fixed(writers_schema, readers_schema, decoder)
283
+ when :enum; read_enum(writers_schema, readers_schema, decoder)
284
+ when :array; read_array(writers_schema, readers_schema, decoder)
285
+ when :map; read_map(writers_schema, readers_schema, decoder)
286
+ when :union; read_union(writers_schema, readers_schema, decoder)
287
+ when :record, :error, :request; read_record(writers_schema, readers_schema, decoder)
288
+ else
289
+ raise AvroError, "Cannot read unknown schema type: #{writers_schema.type}"
290
+ end
291
+
292
+ readers_schema.type_adapter.decode(datum)
293
+ end
294
+
295
+ def read_fixed(writers_schema, _readers_schema, decoder)
296
+ decoder.read(writers_schema.size)
297
+ end
298
+
299
+ def read_enum(writers_schema, readers_schema, decoder)
300
+ index_of_symbol = decoder.read_int
301
+ read_symbol = writers_schema.symbols[index_of_symbol]
302
+
303
+ # TODO(jmhodges): figure out what unset means for resolution
304
+ # schema resolution
305
+ unless readers_schema.symbols.include?(read_symbol)
306
+ # 'unset' here
307
+ end
308
+
309
+ read_symbol
310
+ end
311
+
312
+ def read_array(writers_schema, readers_schema, decoder)
313
+ read_items = []
314
+ block_count = decoder.read_long
315
+ while block_count != 0
316
+ if block_count < 0
317
+ block_count = -block_count
318
+ _block_size = decoder.read_long
319
+ end
320
+ block_count.times do
321
+ read_items << read_data(writers_schema.items,
322
+ readers_schema.items,
323
+ decoder)
324
+ end
325
+ block_count = decoder.read_long
326
+ end
327
+
328
+ read_items
329
+ end
330
+
331
+ def read_map(writers_schema, readers_schema, decoder)
332
+ read_items = {}
333
+ block_count = decoder.read_long
334
+ while block_count != 0
335
+ if block_count < 0
336
+ block_count = -block_count
337
+ _block_size = decoder.read_long
338
+ end
339
+ block_count.times do
340
+ key = decoder.read_string
341
+ read_items[key] = read_data(writers_schema.values,
342
+ readers_schema.values,
343
+ decoder)
344
+ end
345
+ block_count = decoder.read_long
346
+ end
347
+
348
+ read_items
349
+ end
350
+
351
+ def read_union(writers_schema, readers_schema, decoder)
352
+ index_of_schema = decoder.read_long
353
+ selected_writers_schema = writers_schema.schemas[index_of_schema]
354
+
355
+ read_data(selected_writers_schema, readers_schema, decoder)
356
+ end
357
+
358
+ def read_record(writers_schema, readers_schema, decoder)
359
+ readers_fields_hash = readers_schema.fields_hash
360
+ read_record = {}
361
+ writers_schema.fields.each do |field|
362
+ if (readers_field = readers_fields_hash[field.name])
363
+ field_val = read_data(field.type, readers_field.type, decoder)
364
+ read_record[field.name] = field_val
365
+ else
366
+ skip_data(field.type, decoder)
367
+ end
368
+ end
369
+
370
+ # fill in the default values
371
+ if readers_fields_hash.size > read_record.size
372
+ writers_fields_hash = writers_schema.fields_hash
373
+ readers_fields_hash.each do |field_name, field|
374
+ unless writers_fields_hash.has_key? field_name
375
+ if field.default?
376
+ field_val = read_default_value(field.type, field.default)
377
+ read_record[field.name] = field_val
378
+ else
379
+ raise AvroError, "Missing data for #{field.type} with no default"
380
+ end
381
+ end
382
+ end
383
+ end
384
+
385
+ read_record
386
+ end
387
+
388
+ def read_default_value(field_schema, default_value)
389
+ # Basically a JSON Decoder?
390
+ case field_schema.type_sym
391
+ when :null
392
+ return nil
393
+ when :boolean
394
+ return default_value
395
+ when :int, :long
396
+ return Integer(default_value)
397
+ when :float, :double
398
+ return Float(default_value)
399
+ when :enum, :fixed, :string, :bytes
400
+ return default_value
401
+ when :array
402
+ read_array = []
403
+ default_value.each do |json_val|
404
+ item_val = read_default_value(field_schema.items, json_val)
405
+ read_array << item_val
406
+ end
407
+ return read_array
408
+ when :map
409
+ read_map = {}
410
+ default_value.each do |key, json_val|
411
+ map_val = read_default_value(field_schema.values, json_val)
412
+ read_map[key] = map_val
413
+ end
414
+ return read_map
415
+ when :union
416
+ return read_default_value(field_schema.schemas[0], default_value)
417
+ when :record, :error
418
+ read_record = {}
419
+ field_schema.fields.each do |field|
420
+ json_val = default_value[field.name]
421
+ json_val = field.default unless json_val
422
+ field_val = read_default_value(field.type, json_val)
423
+ read_record[field.name] = field_val
424
+ end
425
+ return read_record
426
+ else
427
+ fail_msg = "Unknown type: #{field_schema.type}"
428
+ raise AvroError, fail_msg
429
+ end
430
+ end
431
+
432
+ def skip_data(writers_schema, decoder)
433
+ case writers_schema.type_sym
434
+ when :null
435
+ decoder.skip_null
436
+ when :boolean
437
+ decoder.skip_boolean
438
+ when :string
439
+ decoder.skip_string
440
+ when :int
441
+ decoder.skip_int
442
+ when :long
443
+ decoder.skip_long
444
+ when :float
445
+ decoder.skip_float
446
+ when :double
447
+ decoder.skip_double
448
+ when :bytes
449
+ decoder.skip_bytes
450
+ when :fixed
451
+ skip_fixed(writers_schema, decoder)
452
+ when :enum
453
+ skip_enum(writers_schema, decoder)
454
+ when :array
455
+ skip_array(writers_schema, decoder)
456
+ when :map
457
+ skip_map(writers_schema, decoder)
458
+ when :union
459
+ skip_union(writers_schema, decoder)
460
+ when :record, :error, :request
461
+ skip_record(writers_schema, decoder)
462
+ else
463
+ raise AvroError, "Unknown schema type: #{writers_schema.type}"
464
+ end
465
+ end
466
+
467
+ def skip_fixed(writers_schema, decoder)
468
+ decoder.skip(writers_schema.size)
469
+ end
470
+
471
+ def skip_enum(_writers_schema, decoder)
472
+ decoder.skip_int
473
+ end
474
+
475
+ def skip_union(writers_schema, decoder)
476
+ index = decoder.read_long
477
+ skip_data(writers_schema.schemas[index], decoder)
478
+ end
479
+
480
+ def skip_array(writers_schema, decoder)
481
+ skip_blocks(decoder) { skip_data(writers_schema.items, decoder) }
482
+ end
483
+
484
+ def skip_map(writers_schema, decoder)
485
+ skip_blocks(decoder) {
486
+ decoder.skip_string
487
+ skip_data(writers_schema.values, decoder)
488
+ }
489
+ end
490
+
491
+ def skip_record(writers_schema, decoder)
492
+ writers_schema.fields.each{|f| skip_data(f.type, decoder) }
493
+ end
494
+
495
+ private
496
+ def skip_blocks(decoder, &blk)
497
+ block_count = decoder.read_long
498
+ while block_count != 0
499
+ if block_count < 0
500
+ decoder.skip(decoder.read_long)
501
+ else
502
+ block_count.times(&blk)
503
+ end
504
+ block_count = decoder.read_long
505
+ end
506
+ end
507
+ end # DatumReader
508
+
509
+ # DatumWriter for generic ruby objects
510
+ class DatumWriter
511
+ attr_accessor :writers_schema
512
+ def initialize(writers_schema=nil)
513
+ @writers_schema = writers_schema
514
+ end
515
+
516
+ def write(datum, encoder)
517
+ write_data(writers_schema, datum, encoder)
518
+ end
519
+
520
+ def write_data(writers_schema, logical_datum, encoder)
521
+ datum = writers_schema.type_adapter.encode(logical_datum)
522
+
523
+ unless Schema.validate(writers_schema, datum, { recursive: false, encoded: true })
524
+ raise AvroTypeError.new(writers_schema, datum)
525
+ end
526
+
527
+ # function dispatch to write datum
528
+ case writers_schema.type_sym
529
+ when :null; encoder.write_null(datum)
530
+ when :boolean; encoder.write_boolean(datum)
531
+ when :string; encoder.write_string(datum)
532
+ when :int; encoder.write_int(datum)
533
+ when :long; encoder.write_long(datum)
534
+ when :float; encoder.write_float(datum)
535
+ when :double; encoder.write_double(datum)
536
+ when :bytes; encoder.write_bytes(datum)
537
+ when :fixed; write_fixed(writers_schema, datum, encoder)
538
+ when :enum; write_enum(writers_schema, datum, encoder)
539
+ when :array; write_array(writers_schema, datum, encoder)
540
+ when :map; write_map(writers_schema, datum, encoder)
541
+ when :union; write_union(writers_schema, datum, encoder)
542
+ when :record, :error, :request; write_record(writers_schema, datum, encoder)
543
+ else
544
+ raise AvroError.new("Unknown type: #{writers_schema.type}")
545
+ end
546
+ end
547
+
548
+ def write_fixed(_writers_schema, datum, encoder)
549
+ encoder.write(datum)
550
+ end
551
+
552
+ def write_enum(writers_schema, datum, encoder)
553
+ index_of_datum = writers_schema.symbols.index(datum)
554
+ encoder.write_int(index_of_datum)
555
+ end
556
+
557
+ def write_array(writers_schema, datum, encoder)
558
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Array)
559
+ if datum.size > 0
560
+ encoder.write_long(datum.size)
561
+ datum.each do |item|
562
+ write_data(writers_schema.items, item, encoder)
563
+ end
564
+ end
565
+ encoder.write_long(0)
566
+ end
567
+
568
+ def write_map(writers_schema, datum, encoder)
569
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
570
+ if datum.size > 0
571
+ encoder.write_long(datum.size)
572
+ datum.each do |k,v|
573
+ encoder.write_string(k)
574
+ write_data(writers_schema.values, v, encoder)
575
+ end
576
+ end
577
+ encoder.write_long(0)
578
+ end
579
+
580
+ def write_union(writers_schema, datum, encoder)
581
+ index_of_schema = -1
582
+ found = writers_schema.schemas.
583
+ find{|e| index_of_schema += 1; found = Schema.validate(e, datum) }
584
+ unless found # Because find_index doesn't exist in 1.8.6
585
+ raise AvroTypeError.new(writers_schema, datum)
586
+ end
587
+ encoder.write_long(index_of_schema)
588
+ write_data(writers_schema.schemas[index_of_schema], datum, encoder)
589
+ end
590
+
591
+ def write_record(writers_schema, datum, encoder)
592
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
593
+ writers_schema.fields.each do |field|
594
+ write_data(field.type, datum[field.name], encoder)
595
+ end
596
+ end
597
+ end # DatumWriter
598
+ end
599
+ end
data/lib/avro/ipc.rb ADDED
@@ -0,0 +1,551 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require "net/http"
18
+
19
+ module Avro::IPC
20
+
21
+ class AvroRemoteError < Avro::AvroError; end
22
+
23
+ HANDSHAKE_REQUEST_SCHEMA = Avro::Schema.parse <<-JSON
24
+ {
25
+ "type": "record",
26
+ "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
27
+ "fields": [
28
+ {"name": "clientHash",
29
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
30
+ {"name": "clientProtocol", "type": ["null", "string"]},
31
+ {"name": "serverHash", "type": "MD5"},
32
+ {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
33
+ ]
34
+ }
35
+ JSON
36
+
37
+ HANDSHAKE_RESPONSE_SCHEMA = Avro::Schema.parse <<-JSON
38
+ {
39
+ "type": "record",
40
+ "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
41
+ "fields": [
42
+ {"name": "match",
43
+ "type": {"type": "enum", "name": "HandshakeMatch",
44
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
45
+ {"name": "serverProtocol", "type": ["null", "string"]},
46
+ {"name": "serverHash",
47
+ "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
48
+ {"name": "meta",
49
+ "type": ["null", {"type": "map", "values": "bytes"}]}
50
+ ]
51
+ }
52
+ JSON
53
+
54
+ HANDSHAKE_REQUESTOR_WRITER = Avro::IO::DatumWriter.new(HANDSHAKE_REQUEST_SCHEMA)
55
+ HANDSHAKE_REQUESTOR_READER = Avro::IO::DatumReader.new(HANDSHAKE_RESPONSE_SCHEMA)
56
+ HANDSHAKE_RESPONDER_WRITER = Avro::IO::DatumWriter.new(HANDSHAKE_RESPONSE_SCHEMA)
57
+ HANDSHAKE_RESPONDER_READER = Avro::IO::DatumReader.new(HANDSHAKE_REQUEST_SCHEMA)
58
+
59
+ META_SCHEMA = Avro::Schema.parse('{"type": "map", "values": "bytes"}')
60
+ META_WRITER = Avro::IO::DatumWriter.new(META_SCHEMA)
61
+ META_READER = Avro::IO::DatumReader.new(META_SCHEMA)
62
+
63
+ SYSTEM_ERROR_SCHEMA = Avro::Schema.parse('["string"]')
64
+
65
+ # protocol cache
66
+ REMOTE_HASHES = {}
67
+ REMOTE_PROTOCOLS = {}
68
+
69
+ BUFFER_HEADER_LENGTH = 4
70
+ BUFFER_SIZE = 8192
71
+
72
+ # Raised when an error message is sent by an Avro requestor or responder.
73
+ class AvroRemoteException < Avro::AvroError; end
74
+
75
+ class ConnectionClosedException < Avro::AvroError; end
76
+
77
+ # Base class for the client side of a protocol interaction.
78
+ class Requestor
79
+ attr_reader :local_protocol, :transport, :remote_protocol, :remote_hash
80
+ attr_accessor :send_protocol
81
+
82
+ def initialize(local_protocol, transport)
83
+ @local_protocol = local_protocol
84
+ @transport = transport
85
+ @remote_protocol = nil
86
+ @remote_hash = nil
87
+ @send_protocol = nil
88
+ end
89
+
90
+ def remote_protocol=(new_remote_protocol)
91
+ @remote_protocol = new_remote_protocol
92
+ REMOTE_PROTOCOLS[transport.remote_name] = remote_protocol
93
+ end
94
+
95
+ def remote_hash=(new_remote_hash)
96
+ @remote_hash = new_remote_hash
97
+ REMOTE_HASHES[transport.remote_name] = remote_hash
98
+ end
99
+
100
+ def request(message_name, request_datum)
101
+ # Writes a request message and reads a response or error message.
102
+ # build handshake and call request
103
+ buffer_writer = StringIO.new(''.force_encoding('BINARY'))
104
+ buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
105
+ write_handshake_request(buffer_encoder)
106
+ write_call_request(message_name, request_datum, buffer_encoder)
107
+
108
+ # send the handshake and call request; block until call response
109
+ call_request = buffer_writer.string
110
+ call_response = transport.transceive(call_request)
111
+
112
+ # process the handshake and call response
113
+ buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_response))
114
+ if read_handshake_response(buffer_decoder)
115
+ read_call_response(message_name, buffer_decoder)
116
+ else
117
+ request(message_name, request_datum)
118
+ end
119
+ end
120
+
121
+ def write_handshake_request(encoder)
122
+ local_hash = local_protocol.md5
123
+ remote_name = transport.remote_name
124
+ remote_hash = REMOTE_HASHES[remote_name]
125
+ unless remote_hash
126
+ remote_hash = local_hash
127
+ self.remote_protocol = local_protocol
128
+ end
129
+ request_datum = {
130
+ 'clientHash' => local_hash,
131
+ 'serverHash' => remote_hash
132
+ }
133
+ if send_protocol
134
+ request_datum['clientProtocol'] = local_protocol.to_s
135
+ end
136
+ HANDSHAKE_REQUESTOR_WRITER.write(request_datum, encoder)
137
+ end
138
+
139
+ def write_call_request(message_name, request_datum, encoder)
140
+ # The format of a call request is:
141
+ # * request metadata, a map with values of type bytes
142
+ # * the message name, an Avro string, followed by
143
+ # * the message parameters. Parameters are serialized according to
144
+ # the message's request declaration.
145
+
146
+ # TODO request metadata (not yet implemented)
147
+ request_metadata = {}
148
+ META_WRITER.write(request_metadata, encoder)
149
+
150
+ message = local_protocol.messages[message_name]
151
+ unless message
152
+ raise AvroError, "Unknown message: #{message_name}"
153
+ end
154
+ encoder.write_string(message.name)
155
+
156
+ write_request(message.request, request_datum, encoder)
157
+ end
158
+
159
+ def write_request(request_schema, request_datum, encoder)
160
+ datum_writer = Avro::IO::DatumWriter.new(request_schema)
161
+ datum_writer.write(request_datum, encoder)
162
+ end
163
+
164
+ def read_handshake_response(decoder)
165
+ handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
166
+ we_have_matching_schema = false
167
+
168
+ case handshake_response['match']
169
+ when 'BOTH'
170
+ self.send_protocol = false
171
+ we_have_matching_schema = true
172
+ when 'CLIENT'
173
+ raise AvroError.new('Handshake failure. match == CLIENT') if send_protocol
174
+ self.remote_protocol = Avro::Protocol.parse(handshake_response['serverProtocol'])
175
+ self.remote_hash = handshake_response['serverHash']
176
+ self.send_protocol = false
177
+ we_have_matching_schema = true
178
+ when 'NONE'
179
+ raise AvroError.new('Handshake failure. match == NONE') if send_protocol
180
+ self.remote_protocol = Avro::Protocol.parse(handshake_response['serverProtocol'])
181
+ self.remote_hash = handshake_response['serverHash']
182
+ self.send_protocol = true
183
+ else
184
+ raise AvroError.new("Unexpected match: #{match}")
185
+ end
186
+
187
+ return we_have_matching_schema
188
+ end
189
+
190
+ def read_call_response(message_name, decoder)
191
+ # The format of a call response is:
192
+ # * response metadata, a map with values of type bytes
193
+ # * a one-byte error flag boolean, followed by either:
194
+ # * if the error flag is false,
195
+ # the message response, serialized per the message's response schema.
196
+ # * if the error flag is true,
197
+ # the error, serialized per the message's error union schema.
198
+ _response_metadata = META_READER.read(decoder)
199
+
200
+ # remote response schema
201
+ remote_message_schema = remote_protocol.messages[message_name]
202
+ raise AvroError.new("Unknown remote message: #{message_name}") unless remote_message_schema
203
+
204
+ # local response schema
205
+ local_message_schema = local_protocol.messages[message_name]
206
+ unless local_message_schema
207
+ raise AvroError.new("Unknown local message: #{message_name}")
208
+ end
209
+
210
+ # error flag
211
+ if !decoder.read_boolean
212
+ writers_schema = remote_message_schema.response
213
+ readers_schema = local_message_schema.response
214
+ read_response(writers_schema, readers_schema, decoder)
215
+ else
216
+ writers_schema = remote_message_schema.errors || SYSTEM_ERROR_SCHEMA
217
+ readers_schema = local_message_schema.errors || SYSTEM_ERROR_SCHEMA
218
+ raise read_error(writers_schema, readers_schema, decoder)
219
+ end
220
+ end
221
+
222
+ def read_response(writers_schema, readers_schema, decoder)
223
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
224
+ datum_reader.read(decoder)
225
+ end
226
+
227
+ def read_error(writers_schema, readers_schema, decoder)
228
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
229
+ AvroRemoteError.new(datum_reader.read(decoder))
230
+ end
231
+ end
232
+
233
+ # Base class for the server side of a protocol interaction.
234
+ class Responder
235
+ attr_reader :local_protocol, :local_hash, :protocol_cache
236
+ def initialize(local_protocol)
237
+ @local_protocol = local_protocol
238
+ @local_hash = self.local_protocol.md5
239
+ @protocol_cache = {}
240
+ protocol_cache[local_hash] = local_protocol
241
+ end
242
+
243
+ # Called by a server to deserialize a request, compute and serialize
244
+ # a response or error. Compare to 'handle()' in Thrift.
245
+ def respond(call_request, transport=nil)
246
+ buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
247
+ buffer_writer = StringIO.new(''.force_encoding('BINARY'))
248
+ buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
249
+ error = nil
250
+ response_metadata = {}
251
+
252
+ begin
253
+ remote_protocol = process_handshake(buffer_decoder, buffer_encoder, transport)
254
+ # handshake failure
255
+ unless remote_protocol
256
+ return buffer_writer.string
257
+ end
258
+
259
+ # read request using remote protocol
260
+ _request_metadata = META_READER.read(buffer_decoder)
261
+ remote_message_name = buffer_decoder.read_string
262
+
263
+ # get remote and local request schemas so we can do
264
+ # schema resolution (one fine day)
265
+ remote_message = remote_protocol.messages[remote_message_name]
266
+ unless remote_message
267
+ raise AvroError.new("Unknown remote message: #{remote_message_name}")
268
+ end
269
+ local_message = local_protocol.messages[remote_message_name]
270
+ unless local_message
271
+ raise AvroError.new("Unknown local message: #{remote_message_name}")
272
+ end
273
+ writers_schema = remote_message.request
274
+ readers_schema = local_message.request
275
+ request = read_request(writers_schema, readers_schema, buffer_decoder)
276
+ # perform server logic
277
+ begin
278
+ response = call(local_message, request)
279
+ rescue AvroRemoteError => e
280
+ error = e
281
+ rescue Exception => e # rubocop:disable Lint/RescueException
282
+ error = AvroRemoteError.new(e.to_s)
283
+ end
284
+
285
+ # write response using local protocol
286
+ META_WRITER.write(response_metadata, buffer_encoder)
287
+ buffer_encoder.write_boolean(!!error)
288
+ if error.nil?
289
+ writers_schema = local_message.response
290
+ write_response(writers_schema, response, buffer_encoder)
291
+ else
292
+ writers_schema = local_message.errors || SYSTEM_ERROR_SCHEMA
293
+ write_error(writers_schema, error, buffer_encoder)
294
+ end
295
+ rescue Avro::AvroError => e
296
+ error = AvroRemoteException.new(e.to_s)
297
+ # TODO does the stuff written here ever get used?
298
+ buffer_encoder = Avro::IO::BinaryEncoder.new(StringIO.new)
299
+ META_WRITER.write(response_metadata, buffer_encoder)
300
+ buffer_encoder.write_boolean(true)
301
+ self.write_error(SYSTEM_ERROR_SCHEMA, error, buffer_encoder)
302
+ end
303
+ buffer_writer.string
304
+ end
305
+
306
+ def process_handshake(decoder, encoder, connection=nil)
307
+ if connection && connection.is_connected?
308
+ return connection.protocol
309
+ end
310
+ handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
311
+ handshake_response = {}
312
+
313
+ # determine the remote protocol
314
+ client_hash = handshake_request['clientHash']
315
+ client_protocol = handshake_request['clientProtocol']
316
+ remote_protocol = protocol_cache[client_hash]
317
+
318
+ if !remote_protocol && client_protocol
319
+ remote_protocol = Avro::Protocol.parse(client_protocol)
320
+ protocol_cache[client_hash] = remote_protocol
321
+ end
322
+
323
+ # evaluate remote's guess of the local protocol
324
+ server_hash = handshake_request['serverHash']
325
+ if local_hash == server_hash
326
+ if !remote_protocol
327
+ handshake_response['match'] = 'NONE'
328
+ else
329
+ handshake_response['match'] = 'BOTH'
330
+ end
331
+ else
332
+ if !remote_protocol
333
+ handshake_response['match'] = 'NONE'
334
+ else
335
+ handshake_response['match'] = 'CLIENT'
336
+ end
337
+ end
338
+
339
+ if handshake_response['match'] != 'BOTH'
340
+ handshake_response['serverProtocol'] = local_protocol.to_s
341
+ handshake_response['serverHash'] = local_hash
342
+ end
343
+
344
+ HANDSHAKE_RESPONDER_WRITER.write(handshake_response, encoder)
345
+
346
+ if connection && handshake_response['match'] != 'NONE'
347
+ connection.protocol = remote_protocol
348
+ end
349
+
350
+ remote_protocol
351
+ end
352
+
353
+ def call(_local_message, _request)
354
+ # Actual work done by server: cf. handler in thrift.
355
+ raise NotImplementedError
356
+ end
357
+
358
+ def read_request(writers_schema, readers_schema, decoder)
359
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
360
+ datum_reader.read(decoder)
361
+ end
362
+
363
+ def write_response(writers_schema, response_datum, encoder)
364
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
365
+ datum_writer.write(response_datum, encoder)
366
+ end
367
+
368
+ def write_error(writers_schema, error_exception, encoder)
369
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
370
+ datum_writer.write(error_exception.to_s, encoder)
371
+ end
372
+ end
373
+
374
+ class SocketTransport
375
+ # A simple socket-based Transport implementation.
376
+
377
+ attr_reader :sock, :remote_name
378
+ attr_accessor :protocol
379
+
380
+ def initialize(sock)
381
+ @sock = sock
382
+ @protocol = nil
383
+ end
384
+
385
+ def is_connected?()
386
+ !!@protocol
387
+ end
388
+
389
+ def transceive(request)
390
+ write_framed_message(request)
391
+ read_framed_message
392
+ end
393
+
394
+ def read_framed_message
395
+ message = []
396
+ loop do
397
+ buffer = StringIO.new(''.force_encoding('BINARY'))
398
+ buffer_length = read_buffer_length
399
+ if buffer_length == 0
400
+ return message.join
401
+ end
402
+ while buffer.tell < buffer_length
403
+ chunk = sock.read(buffer_length - buffer.tell)
404
+ if chunk == ''
405
+ raise ConnectionClosedException.new("Socket read 0 bytes.")
406
+ end
407
+ buffer.write(chunk)
408
+ end
409
+ message << buffer.string
410
+ end
411
+ end
412
+
413
+ def write_framed_message(message)
414
+ message_length = message.bytesize
415
+ total_bytes_sent = 0
416
+ while message_length - total_bytes_sent > 0
417
+ if message_length - total_bytes_sent > BUFFER_SIZE
418
+ buffer_length = BUFFER_SIZE
419
+ else
420
+ buffer_length = message_length - total_bytes_sent
421
+ end
422
+ write_buffer(message[total_bytes_sent,buffer_length])
423
+ total_bytes_sent += buffer_length
424
+ end
425
+ # A message is always terminated by a zero-length buffer.
426
+ write_buffer_length(0)
427
+ end
428
+
429
+ def write_buffer(chunk)
430
+ buffer_length = chunk.bytesize
431
+ write_buffer_length(buffer_length)
432
+ total_bytes_sent = 0
433
+ while total_bytes_sent < buffer_length
434
+ bytes_sent = self.sock.write(chunk[total_bytes_sent..-1])
435
+ if bytes_sent == 0
436
+ raise ConnectionClosedException.new("Socket sent 0 bytes.")
437
+ end
438
+ total_bytes_sent += bytes_sent
439
+ end
440
+ end
441
+
442
+ def write_buffer_length(n)
443
+ bytes_sent = sock.write([n].pack('N'))
444
+ if bytes_sent == 0
445
+ raise ConnectionClosedException.new("socket sent 0 bytes")
446
+ end
447
+ end
448
+
449
+ def read_buffer_length
450
+ read = sock.read(BUFFER_HEADER_LENGTH)
451
+ if read == '' || read == nil
452
+ raise ConnectionClosedException.new("Socket read 0 bytes.")
453
+ end
454
+ read.unpack('N')[0]
455
+ end
456
+
457
+ def close
458
+ sock.close
459
+ end
460
+ end
461
+
462
+ class ConnectionClosedError < StandardError; end
463
+
464
+ class FramedWriter
465
+ attr_reader :writer
466
+ def initialize(writer)
467
+ @writer = writer
468
+ end
469
+
470
+ def write_framed_message(message)
471
+ message_size = message.bytesize
472
+ total_bytes_sent = 0
473
+ while message_size - total_bytes_sent > 0
474
+ if message_size - total_bytes_sent > BUFFER_SIZE
475
+ buffer_size = BUFFER_SIZE
476
+ else
477
+ buffer_size = message_size - total_bytes_sent
478
+ end
479
+ write_buffer(message[total_bytes_sent, buffer_size])
480
+ total_bytes_sent += buffer_size
481
+ end
482
+ write_buffer_size(0)
483
+ end
484
+
485
+ def to_s; writer.string; end
486
+
487
+ private
488
+ def write_buffer(chunk)
489
+ buffer_size = chunk.bytesize
490
+ write_buffer_size(buffer_size)
491
+ writer << chunk
492
+ end
493
+
494
+ def write_buffer_size(n)
495
+ writer.write([n].pack('N'))
496
+ end
497
+ end
498
+
499
+ class FramedReader
500
+ attr_reader :reader
501
+
502
+ def initialize(reader)
503
+ @reader = reader
504
+ end
505
+
506
+ def read_framed_message
507
+ message = []
508
+ loop do
509
+ buffer = ''.force_encoding('BINARY')
510
+ buffer_size = read_buffer_size
511
+
512
+ return message.join if buffer_size == 0
513
+
514
+ while buffer.bytesize < buffer_size
515
+ chunk = reader.read(buffer_size - buffer.bytesize)
516
+ chunk_error?(chunk)
517
+ buffer << chunk
518
+ end
519
+ message << buffer
520
+ end
521
+ end
522
+
523
+ private
524
+ def read_buffer_size
525
+ header = reader.read(BUFFER_HEADER_LENGTH)
526
+ chunk_error?(header)
527
+ header.unpack('N')[0]
528
+ end
529
+
530
+ def chunk_error?(chunk)
531
+ raise ConnectionClosedError.new("Reader read 0 bytes") if chunk == ''
532
+ end
533
+ end
534
+
535
+ # Only works for clients. Sigh.
536
+ class HTTPTransceiver
537
+ attr_reader :remote_name, :host, :port
538
+ def initialize(host, port)
539
+ @host, @port = host, port
540
+ @remote_name = "#{host}:#{port}"
541
+ @conn = Net::HTTP.start host, port
542
+ end
543
+
544
+ def transceive(message)
545
+ writer = FramedWriter.new(StringIO.new(''.force_encoding('BINARY')))
546
+ writer.write_framed_message(message)
547
+ resp = @conn.post('/', writer.to_s, {'Content-Type' => 'avro/binary'})
548
+ FramedReader.new(StringIO.new(resp.body)).read_framed_message
549
+ end
550
+ end
551
+ end
data/lib/avro/logical_types.rb ADDED
@@ -0,0 +1,90 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'date'
19
+
20
+ module Avro
21
+ module LogicalTypes
22
+ module IntDate
23
+ EPOCH_START = Date.new(1970, 1, 1)
24
+
25
+ def self.encode(date)
26
+ return date.to_i if date.is_a?(Numeric)
27
+
28
+ (date - EPOCH_START).to_i
29
+ end
30
+
31
+ def self.decode(int)
32
+ EPOCH_START + int
33
+ end
34
+ end
35
+
36
+ module TimestampMillis
37
+ def self.encode(value)
38
+ return value.to_i if value.is_a?(Numeric)
39
+
40
+ time = value.to_time
41
+ time.to_i * 1000 + time.usec / 1000
42
+ end
43
+
44
+ def self.decode(int)
45
+ s, ms = int / 1000, int % 1000
46
+ Time.at(s, ms * 1000).utc
47
+ end
48
+ end
49
+
50
+ module TimestampMicros
51
+ def self.encode(value)
52
+ return value.to_i if value.is_a?(Numeric)
53
+
54
+ time = value.to_time
55
+ time.to_i * 1000_000 + time.usec
56
+ end
57
+
58
+ def self.decode(int)
59
+ s, us = int / 1000_000, int % 1000_000
60
+ Time.at(s, us).utc
61
+ end
62
+ end
63
+
64
+ module Identity
65
+ def self.encode(datum)
66
+ datum
67
+ end
68
+
69
+ def self.decode(datum)
70
+ datum
71
+ end
72
+ end
73
+
74
+ TYPES = {
75
+ "int" => {
76
+ "date" => IntDate
77
+ },
78
+ "long" => {
79
+ "timestamp-millis" => TimestampMillis,
80
+ "timestamp-micros" => TimestampMicros
81
+ },
82
+ }.freeze
83
+
84
+ def self.type_adapter(type, logical_type)
85
+ return unless logical_type
86
+
87
+ TYPES.fetch(type, {}.freeze).fetch(logical_type, Identity)
88
+ end
89
+ end
90
+ end
data/lib/avro/protocol.rb ADDED
@@ -0,0 +1,165 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class Protocol
19
+ VALID_TYPE_SCHEMA_TYPES = Set.new(%w[enum record error fixed])
20
+ VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
21
+ class ProtocolParseError < Avro::AvroError; end
22
+
23
+ attr_reader :name, :namespace, :types, :messages, :md5, :doc
24
+ def self.parse(protocol_string)
25
+ json_data = MultiJson.load(protocol_string)
26
+
27
+ if json_data.is_a? Hash
28
+ name = json_data['protocol']
29
+ namespace = json_data['namespace']
30
+ types = json_data['types']
31
+ messages = json_data['messages']
32
+ doc = json_data['doc']
33
+ Protocol.new(name, namespace, types, messages, doc)
34
+ else
35
+ raise ProtocolParseError, "Not a JSON object: #{json_data}"
36
+ end
37
+ end
38
+
39
+ def initialize(name, namespace=nil, types=nil, messages=nil, doc=nil)
40
+ # Ensure valid ctor args
41
+ if !name
42
+ raise ProtocolParseError, 'Protocols must have a non-empty name.'
43
+ elsif !name.is_a?(String)
44
+ raise ProtocolParseError, 'The name property must be a string.'
45
+ elsif !namespace.is_a?(String)
46
+ raise ProtocolParseError, 'The namespace property must be a string.'
47
+ elsif !types.is_a?(Array)
48
+ raise ProtocolParseError, 'The types property must be a list.'
49
+ elsif !messages.is_a?(Hash)
50
+ raise ProtocolParseError, 'The messages property must be a JSON object.'
51
+ end
52
+
53
+ @name = name
54
+ @namespace = namespace
55
+ type_names = {}
56
+ @types = parse_types(types, type_names)
57
+ @messages = parse_messages(messages, type_names)
58
+ @md5 = Digest::MD5.digest(to_s)
59
+ @doc = doc
60
+ end
61
+
62
+ def to_s
63
+ MultiJson.dump to_avro
64
+ end
65
+
66
+ def ==(other)
67
+ to_avro == other.to_avro
68
+ end
69
+
70
+ private
71
+ def parse_types(types, type_names)
72
+ types.collect do |type|
73
+ # FIXME adding type.name to type_names is not defined in the
74
+ # spec. Possible bug in the python impl and the spec.
75
+ type_object = Schema.real_parse(type, type_names, namespace)
76
+ unless VALID_TYPE_SCHEMA_TYPES_SYM.include?(type_object.type_sym)
77
+ msg = "Type #{type} not an enum, record, fixed or error."
78
+ raise ProtocolParseError, msg
79
+ end
80
+ type_object
81
+ end
82
+ end
83
+
84
+ def parse_messages(messages, names)
85
+ message_objects = {}
86
+ messages.each do |name, body|
87
+ if message_objects.has_key?(name)
88
+ raise ProtocolParseError, "Message name \"#{name}\" repeated."
89
+ elsif !body.is_a?(Hash)
90
+ raise ProtocolParseError, "Message name \"#{name}\" has non-object body #{body.inspect}"
91
+ end
92
+
93
+ request = body['request']
94
+ response = body['response']
95
+ errors = body['errors']
96
+ doc = body['doc']
97
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace, doc)
98
+ end
99
+ message_objects
100
+ end
101
+
102
+ protected
103
+ def to_avro(names=Set.new)
104
+ hsh = {'protocol' => name}
105
+ hsh['namespace'] = namespace if namespace
106
+ hsh['types'] = types.map{|t| t.to_avro(names) } if types
107
+
108
+ if messages
109
+ hsh['messages'] = messages.inject({}) {|h, (k,t)| h[k] = t.to_avro(names); h }
110
+ end
111
+
112
+ hsh
113
+ end
114
+
115
+ class Message
116
+ attr_reader :name, :request, :response, :errors, :default_namespace, :doc
117
+
118
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil, doc=nil)
119
+ @name = name
120
+ @default_namespace = default_namespace
121
+ @request = parse_request(request, names)
122
+ @response = parse_response(response, names)
123
+ @errors = parse_errors(errors, names) if errors
124
+ @doc = doc
125
+ end
126
+
127
+ def to_avro(names=Set.new)
128
+ {
129
+ 'request' => request.to_avro(names),
130
+ 'response' => response.to_avro(names)
131
+ }.tap do |hash|
132
+ hash['errors'] = errors.to_avro(names) if errors
133
+ hash['doc'] = @doc if @doc
134
+ end
135
+ end
136
+
137
+ def to_s
138
+ Yajl.dump to_avro
139
+ end
140
+
141
+ def parse_request(request, names)
142
+ unless request.is_a?(Array)
143
+ raise ProtocolParseError, "Request property not an Array: #{request.inspect}"
144
+ end
145
+ Schema::RecordSchema.new(nil, default_namespace, request, names, :request)
146
+ end
147
+
148
+ def parse_response(response, names)
149
+ if response.is_a?(String) && names
150
+ fullname = Name.make_fullname(response, default_namespace)
151
+ return names[fullname] if names.include?(fullname)
152
+ end
153
+
154
+ Schema.real_parse(response, names, default_namespace)
155
+ end
156
+
157
+ def parse_errors(errors, names)
158
+ unless errors.is_a?(Array)
159
+ raise ProtocolParseError, "Errors property not an Array: #{errors}"
160
+ end
161
+ Schema.real_parse(errors, names, default_namespace)
162
+ end
163
+ end
164
+ end
165
+ end
data/lib/avro/schema.rb ADDED
@@ -0,0 +1,523 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'avro/logical_types'
18
+
19
+ module Avro
20
+ class Schema
21
+ # Sets of strings, for backwards compatibility. See below for sets of symbols,
22
+ # for better performance.
23
+ PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
24
+ NAMED_TYPES = Set.new(%w[fixed enum record error])
25
+
26
+ VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
27
+
28
+ PRIMITIVE_TYPES_SYM = Set.new(PRIMITIVE_TYPES.map(&:to_sym))
29
+ NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
30
+ VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
31
+
32
+ NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*#x2F;
33
+
34
+ INT_MIN_VALUE = -(1 << 31)
35
+ INT_MAX_VALUE = (1 << 31) - 1
36
+ LONG_MIN_VALUE = -(1 << 63)
37
+ LONG_MAX_VALUE = (1 << 63) - 1
38
+
39
+ def self.parse(json_string)
40
+ real_parse(MultiJson.load(json_string), {})
41
+ end
42
+
43
+ # Build Avro Schema from data parsed out of JSON string.
44
+ def self.real_parse(json_obj, names=nil, default_namespace=nil)
45
+ if json_obj.is_a? Hash
46
+ type = json_obj['type']
47
+ logical_type = json_obj['logicalType']
48
+ raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
49
+
50
+ # Check that the type is valid before calling #to_sym, since symbols are never garbage
51
+ # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
52
+ unless VALID_TYPES.include?(type)
53
+ raise SchemaParseError, "Unknown type: #{type}"
54
+ end
55
+
56
+ type_sym = type.to_sym
57
+ if PRIMITIVE_TYPES_SYM.include?(type_sym)
58
+ case type_sym
59
+ when :bytes
60
+ precision = json_obj['precision']
61
+ scale = json_obj['scale']
62
+ return BytesSchema.new(type_sym, logical_type, precision, scale)
63
+ else
64
+ return PrimitiveSchema.new(type_sym, logical_type)
65
+ end
66
+ elsif NAMED_TYPES_SYM.include? type_sym
67
+ name = json_obj['name']
68
+ if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
69
+ raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
70
+ end
71
+ namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
72
+ case type_sym
73
+ when :fixed
74
+ size = json_obj['size']
75
+ return FixedSchema.new(name, namespace, size, names, logical_type)
76
+ when :enum
77
+ symbols = json_obj['symbols']
78
+ doc = json_obj['doc']
79
+ return EnumSchema.new(name, namespace, symbols, names, doc)
80
+ when :record, :error
81
+ fields = json_obj['fields']
82
+ doc = json_obj['doc']
83
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
84
+ else
85
+ raise SchemaParseError.new("Unknown named type: #{type}")
86
+ end
87
+
88
+ else
89
+ case type_sym
90
+ when :array
91
+ return ArraySchema.new(json_obj['items'], names, default_namespace)
92
+ when :map
93
+ return MapSchema.new(json_obj['values'], names, default_namespace)
94
+ else
95
+ raise SchemaParseError.new("Unknown Valid Type: #{type}")
96
+ end
97
+ end
98
+
99
+ elsif json_obj.is_a? Array
100
+ # JSON array (union)
101
+ return UnionSchema.new(json_obj, names, default_namespace)
102
+ elsif PRIMITIVE_TYPES.include? json_obj
103
+ return PrimitiveSchema.new(json_obj)
104
+ else
105
+ raise UnknownSchemaError.new(json_obj)
106
+ end
107
+ end
108
+
109
+ # Determine if a ruby datum is an instance of a schema
110
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
111
+ SchemaValidator.validate!(expected_schema, logical_datum, options)
112
+ true
113
+ rescue SchemaValidator::ValidationError
114
+ false
115
+ end
116
+
117
+ def initialize(type, logical_type=nil)
118
+ @type_sym = type.is_a?(Symbol) ? type : type.to_sym
119
+ @logical_type = logical_type
120
+ end
121
+
122
+ attr_reader :type_sym
123
+ attr_reader :logical_type
124
+
125
+ # Returns the type as a string (rather than a symbol), for backwards compatibility.
126
+ # Deprecated in favor of {#type_sym}.
127
+ def type; @type_sym.to_s; end
128
+
129
+ def type_adapter
130
+ @type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
131
+ end
132
+
133
+ # Returns the MD5 fingerprint of the schema as an Integer.
134
+ def md5_fingerprint
135
+ parsing_form = SchemaNormalization.to_parsing_form(self)
136
+ Digest::MD5.hexdigest(parsing_form).to_i(16)
137
+ end
138
+
139
+ # Returns the SHA-256 fingerprint of the schema as an Integer.
140
+ def sha256_fingerprint
141
+ parsing_form = SchemaNormalization.to_parsing_form(self)
142
+ Digest::SHA256.hexdigest(parsing_form).to_i(16)
143
+ end
144
+
145
+ CRC_EMPTY = 0xc15d213aa4d7a795
146
+
147
+ # The java library caches this value after initialized, so this pattern
148
+ # mimics that.
149
+ @@fp_table = nil
150
+ def initFPTable
151
+ @@fp_table = Array.new(256)
152
+ 256.times do |i|
153
+ fp = i
154
+ 8.times do
155
+ fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
156
+ end
157
+ @@fp_table[i] = fp
158
+ end
159
+ end
160
+
161
+ def crc_64_avro_fingerprint
162
+ parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
163
+ data_bytes = parsing_form.unpack("C*")
164
+
165
+ initFPTable unless @@fp_table
166
+
167
+ fp = CRC_EMPTY
168
+ data_bytes.each do |b|
169
+ fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
170
+ end
171
+ fp
172
+ end
173
+
174
+ SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
175
+ def single_object_encoding_header
176
+ [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
177
+ end
178
+ def single_object_schema_fingerprint
179
+ working = crc_64_avro_fingerprint
180
+ bytes = Array.new(8)
181
+ 8.times do |i|
182
+ bytes[7 - i] = (working & 0xff)
183
+ working = working >> 8
184
+ end
185
+ bytes
186
+ end
187
+
188
+ def read?(writers_schema)
189
+ SchemaCompatibility.can_read?(writers_schema, self)
190
+ end
191
+
192
+ def be_read?(other_schema)
193
+ other_schema.read?(self)
194
+ end
195
+
196
+ def mutual_read?(other_schema)
197
+ SchemaCompatibility.mutual_read?(other_schema, self)
198
+ end
199
+
200
+ def ==(other, _seen=nil)
201
+ other.is_a?(Schema) && type_sym == other.type_sym
202
+ end
203
+
204
+ def hash(_seen=nil)
205
+ type_sym.hash
206
+ end
207
+
208
+ def subparse(json_obj, names=nil, namespace=nil)
209
+ if json_obj.is_a?(String) && names
210
+ fullname = Name.make_fullname(json_obj, namespace)
211
+ return names[fullname] if names.include?(fullname)
212
+ end
213
+
214
+ begin
215
+ Schema.real_parse(json_obj, names, namespace)
216
+ rescue => e
217
+ raise e if e.is_a? SchemaParseError
218
+ raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
219
+ end
220
+ end
221
+
222
+ def to_avro(_names=nil)
223
+ props = {'type' => type}
224
+ props['logicalType'] = logical_type if logical_type
225
+ props
226
+ end
227
+
228
+ def to_s
229
+ MultiJson.dump to_avro
230
+ end
231
+
232
+ class NamedSchema < Schema
233
+ attr_reader :name, :namespace
234
+
235
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
236
+ super(type, logical_type)
237
+ @name, @namespace = Name.extract_namespace(name, namespace)
238
+ @doc = doc
239
+ Name.add_name(names, self)
240
+ end
241
+
242
+ def to_avro(names=Set.new)
243
+ if @name
244
+ return fullname if names.include?(fullname)
245
+ names << fullname
246
+ end
247
+ props = {'name' => @name}
248
+ props.merge!('namespace' => @namespace) if @namespace
249
+ props.merge!('doc' => @doc) if @doc
250
+ super.merge props
251
+ end
252
+
253
+ def fullname
254
+ @fullname ||= Name.make_fullname(@name, @namespace)
255
+ end
256
+ end
257
+
258
+ class RecordSchema < NamedSchema
259
+ attr_reader :fields, :doc
260
+
261
+ def self.make_field_objects(field_data, names, namespace=nil)
262
+ field_objects, field_names = [], Set.new
263
+ field_data.each do |field|
264
+ if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
265
+ type = field['type']
266
+ name = field['name']
267
+ default = field.key?('default') ? field['default'] : :no_default
268
+ order = field['order']
269
+ doc = field['doc']
270
+ new_field = Field.new(type, name, default, order, names, namespace, doc)
271
+ # make sure field name has not been used yet
272
+ if field_names.include?(new_field.name)
273
+ raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
274
+ end
275
+ field_names << new_field.name
276
+ else
277
+ raise SchemaParseError, "Not a valid field: #{field}"
278
+ end
279
+ field_objects << new_field
280
+ end
281
+ field_objects
282
+ end
283
+
284
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
285
+ if schema_type == :request || schema_type == 'request'
286
+ @type_sym = schema_type.to_sym
287
+ @namespace = namespace
288
+ @name = nil
289
+ @doc = nil
290
+ else
291
+ super(schema_type, name, namespace, names, doc)
292
+ end
293
+ @fields = if fields
294
+ RecordSchema.make_field_objects(fields, names, self.namespace)
295
+ else
296
+ {}
297
+ end
298
+ end
299
+
300
+ def fields_hash
301
+ @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
302
+ end
303
+
304
+ def to_avro(names=Set.new)
305
+ hsh = super
306
+ return hsh unless hsh.is_a?(Hash)
307
+ hsh['fields'] = @fields.map {|f| f.to_avro(names) }
308
+ if type_sym == :request
309
+ hsh['fields']
310
+ else
311
+ hsh
312
+ end
313
+ end
314
+ end
315
+
316
+ class ArraySchema < Schema
317
+ attr_reader :items
318
+
319
+ def initialize(items, names=nil, default_namespace=nil)
320
+ super(:array)
321
+ @items = subparse(items, names, default_namespace)
322
+ end
323
+
324
+ def to_avro(names=Set.new)
325
+ super.merge('items' => items.to_avro(names))
326
+ end
327
+ end
328
+
329
+ class MapSchema < Schema
330
+ attr_reader :values
331
+
332
+ def initialize(values, names=nil, default_namespace=nil)
333
+ super(:map)
334
+ @values = subparse(values, names, default_namespace)
335
+ end
336
+
337
+ def to_avro(names=Set.new)
338
+ super.merge('values' => values.to_avro(names))
339
+ end
340
+ end
341
+
342
+ class UnionSchema < Schema
343
+ attr_reader :schemas
344
+
345
+ def initialize(schemas, names=nil, default_namespace=nil)
346
+ super(:union)
347
+
348
+ @schemas = schemas.each_with_object([]) do |schema, schema_objects|
349
+ new_schema = subparse(schema, names, default_namespace)
350
+ ns_type = new_schema.type_sym
351
+
352
+ if VALID_TYPES_SYM.include?(ns_type) &&
353
+ !NAMED_TYPES_SYM.include?(ns_type) &&
354
+ schema_objects.any?{|o| o.type_sym == ns_type }
355
+ raise SchemaParseError, "#{ns_type} is already in Union"
356
+ elsif ns_type == :union
357
+ raise SchemaParseError, "Unions cannot contain other unions"
358
+ else
359
+ schema_objects << new_schema
360
+ end
361
+ end
362
+ end
363
+
364
+ def to_avro(names=Set.new)
365
+ schemas.map {|schema| schema.to_avro(names) }
366
+ end
367
+ end
368
+
369
+ class EnumSchema < NamedSchema
370
+ attr_reader :symbols, :doc
371
+
372
+ def initialize(name, space, symbols, names=nil, doc=nil)
373
+ if symbols.uniq.length < symbols.length
374
+ fail_msg = "Duplicate symbol: #{symbols}"
375
+ raise Avro::SchemaParseError, fail_msg
376
+ end
377
+ super(:enum, name, space, names, doc)
378
+ @symbols = symbols
379
+ end
380
+
381
+ def to_avro(_names=Set.new)
382
+ avro = super
383
+ avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
384
+ end
385
+ end
386
+
387
+ # Valid primitive types are in PRIMITIVE_TYPES.
388
+ class PrimitiveSchema < Schema
389
+ def initialize(type, logical_type=nil)
390
+ if PRIMITIVE_TYPES_SYM.include?(type)
391
+ super(type, logical_type)
392
+ elsif PRIMITIVE_TYPES.include?(type)
393
+ super(type.to_sym, logical_type)
394
+ else
395
+ raise AvroError.new("#{type} is not a valid primitive type.")
396
+ end
397
+ end
398
+
399
+ def to_avro(names=nil)
400
+ hsh = super
401
+ hsh.size == 1 ? type : hsh
402
+ end
403
+ end
404
+
405
+ class BytesSchema < PrimitiveSchema
406
+ attr_reader :precision, :scale
407
+ def initialize(type, logical_type=nil, precision=nil, scale=nil)
408
+ super(type.to_sym, logical_type)
409
+ @precision = precision
410
+ @scale = scale
411
+ end
412
+
413
+ def to_avro(names=nil)
414
+ avro = super
415
+ return avro if avro.is_a?(String)
416
+
417
+ avro['precision'] = precision if precision
418
+ avro['scale'] = scale if scale
419
+ avro
420
+ end
421
+ end
422
+
423
+ class FixedSchema < NamedSchema
424
+ attr_reader :size
425
+ def initialize(name, space, size, names=nil, logical_type=nil)
426
+ # Ensure valid cto args
427
+ unless size.is_a?(Integer)
428
+ raise AvroError, 'Fixed Schema requires a valid integer for size property.'
429
+ end
430
+ super(:fixed, name, space, names, nil, logical_type)
431
+ @size = size
432
+ end
433
+
434
+ def to_avro(names=Set.new)
435
+ avro = super
436
+ avro.is_a?(Hash) ? avro.merge('size' => size) : avro
437
+ end
438
+ end
439
+
440
+ class Field < Schema
441
+ attr_reader :type, :name, :default, :order, :doc
442
+
443
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
444
+ @type = subparse(type, names, namespace)
445
+ @name = name
446
+ @default = default
447
+ @order = order
448
+ @doc = doc
449
+ validate_default! if default? && !Avro.disable_field_default_validation
450
+ end
451
+
452
+ def default?
453
+ @default != :no_default
454
+ end
455
+
456
+ def to_avro(names=Set.new)
457
+ {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
458
+ avro['default'] = default if default?
459
+ avro['order'] = order if order
460
+ avro['doc'] = doc if doc
461
+ end
462
+ end
463
+
464
+ private
465
+
466
+ def validate_default!
467
+ type_for_default = if type.type_sym == :union
468
+ type.schemas.first
469
+ else
470
+ type
471
+ end
472
+
473
+ Avro::SchemaValidator.validate!(type_for_default, default)
474
+ rescue Avro::SchemaValidator::ValidationError => e
475
+ raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}"
476
+ end
477
+ end
478
+ end
479
+
480
+ class SchemaParseError < AvroError; end
481
+
482
+ class UnknownSchemaError < SchemaParseError
483
+ attr_reader :type_name
484
+
485
+ def initialize(type)
486
+ @type_name = type
487
+ super("#{type.inspect} is not a schema we know about.")
488
+ end
489
+ end
490
+
491
+ module Name
492
+ def self.extract_namespace(name, namespace)
493
+ parts = name.split('.')
494
+ if parts.size > 1
495
+ namespace, name = parts[0..-2].join('.'), parts.last
496
+ end
497
+ return name, namespace
498
+ end
499
+
500
+ # Add a new schema object to the names dictionary (in place).
501
+ def self.add_name(names, new_schema)
502
+ new_fullname = new_schema.fullname
503
+ if Avro::Schema::VALID_TYPES.include?(new_fullname)
504
+ raise SchemaParseError, "#{new_fullname} is a reserved type name."
505
+ elsif names.nil?
506
+ names = {}
507
+ elsif names.has_key?(new_fullname)
508
+ raise SchemaParseError, "The name \"#{new_fullname}\" is already in use."
509
+ end
510
+
511
+ names[new_fullname] = new_schema
512
+ names
513
+ end
514
+
515
+ def self.make_fullname(name, namespace)
516
+ if !name.include?('.') && !namespace.nil?
517
+ namespace + '.' + name
518
+ else
519
+ name
520
+ end
521
+ end
522
+ end
523
+ end
data/lib/avro/schema_compatibility.rb ADDED
@@ -0,0 +1,170 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ module Avro
17
+ module SchemaCompatibility
18
+ # Perform a full, recursive check that a datum written using the writers_schema
19
+ # can be read using the readers_schema.
20
+ def self.can_read?(writers_schema, readers_schema)
21
+ Checker.new.can_read?(writers_schema, readers_schema)