checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
---
2
2
SHA256:
3
- metadata.gz: 329e17b63dd792550707d01ce3a310a8bc81d2b04c5d2a6b5efc983a1b0a1eff
4
- data.tar.gz: 1a0d14ccb1326181e9c929de5ce1f8e057c698c3b10ee88eab1a9685b6bf0c39
3
+ metadata.gz: 40f6c6a9c5d3225fe8b8c293e05306e8d1776c3648f82b4f7f8387310a55ec2d
4
+ data.tar.gz: ed8a2164e15d5d9f74672434cda4ee93975bfd8f80cb65b295451509bf25a6f9
5
5
SHA512:
6
- metadata.gz: 88a8389db13b3ed17a82772d9a66378c6184133636d53617e5550d6cdd27cd542c3a5ca1320faf94c3b1a2a20441b119e1a9b915ecd268802e0a993711149fcc
7
- data.tar.gz: 8ac49deb0e1b0c3a813f2648d7d9a55e6135b883d6f9c73c7d962c37f5af364d79284c58e5069555bef433c84e4e91d71904ca3b695aa28daef2e503ee23625f
6
+ metadata.gz: 345b32732cb544585a3a12c7c6107ab8011c64ea1938043c82c74b66a0f2a12c96c5911c1533e6819c6d6809ddc73c95801c94faaf6f4dea0ddcf0fa58ab2594
7
+ data.tar.gz: e250dad59f16ae47d3866b0b5b9b9254d1441e2ce0c30fe8f689e83f1c07136b364f9a8c84b8e257c942f00c58400bdb5283aed99695036a22e0f9604a04194f
data/lib/http_crawler/client.rb CHANGED
@@ -9,8 +9,8 @@ module HttpCrawler
9
9
# web_name = "biquge_duquanben"
10
10
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
11
11
#
12
- def for(web_name)
13
- "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new()
12
+ def for(web_name, args = {})
13
+ "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new(args)
14
14
end
15
15
16
16
#
@@ -18,8 +18,8 @@ module HttpCrawler
18
18
# module_name = "HttpCrawler::Web::BiqugeDuquanben"
19
19
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
20
20
#
21
- def for_module(module_name, *args)
22
- "#{module_name}::Client".constantize.new()
21
+ def for_module(module_name, args = {})
22
+ "#{module_name}::Client".constantize.new(args)
23
23
end
24
24
25
25
def for_uri(path)
@@ -54,6 +54,7 @@ module HttpCrawler
54
54
# 初始化一些 client 自定义参数
55
55
init_client
56
56
57
+ self.redirect = true
57
58
# 初始化 代理参数
58
59
@proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
59
60
end
@@ -109,6 +110,8 @@ module HttpCrawler
109
110
end
110
111
end
111
112
113
+ attr_accessor :redirect
114
+
112
115
attr_accessor :header
113
116
# 头文件相关方法
114
117
def header(parameter = {})
@@ -147,6 +150,11 @@ module HttpCrawler
147
150
148
151
def update_cookies(parameter = {})
149
152
parameter = parameter.symbolize_keys
153
+
154
+ @response.cookies.each do |cookie|
155
+ @cookies.add(cookie)
156
+ end unless @response.blank?
157
+
150
158
nil
151
159
end
152
160
@@ -244,10 +252,19 @@ module HttpCrawler
244
252
nil
245
253
end
246
254
247
- # 初始化http请求前置条件
248
- def http
255
+ # 创建时间: 2019/9/11 17:11
256
+ # 更新时间: 2019/9/11
257
+ # 作者: Jagger
258
+ # 方法名称: init_http
259
+ # 方法说明: 初始化http请求前置条件
260
+ # 调用方式: #init_http
261
+ #
262
+ # @return HTTP
263
+ #
264
+ def init_http
265
+ h = HTTP
249
266
# 自动重定向。最大重定向次数 max_hops: 5
250
- h = HTTP.follow(max_hops: 5)
267
+ h = h.follow(max_hops: 5) if self.redirect == true
251
268
252
269
# 添加代理
253
270
h = h.via(@proxy[:p_addr], @proxy[:p_port].to_i, @proxy[:p_user], @proxy[:p_pass]) unless (@proxy.blank?)
@@ -270,6 +287,11 @@ module HttpCrawler
270
287
h
271
288
end
272
289
290
+ # 初始化http请求前置条件
291
+ def http
292
+ init_http
293
+ end
294
+
273
295
274
296
# 发送 get 请求
275
297
def get(path, params = {}, limit = 3)
@@ -322,7 +344,7 @@ module HttpCrawler
322
344
n = max_error_num
323
345
begin
324
346
r = block.call
325
- if r.status.success?
347
+ if r.status.success? || (redirect == false && r.status.redirect?)
326
348
return r
327
349
else
328
350
raise "请求失败(#{r.code}):#{r.uri.to_s}"
data/lib/http_crawler/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
module HttpCrawler
2
- VERSION = "0.3.1.29"
2
+ VERSION = "0.3.1.30"
3
3
end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
--- !ruby/object:Gem::Specification
2
2
name: http_crawler
3
3
version: !ruby/object:Gem::Version
4
- version: 0.3.1.29
4
+ version: 0.3.1.30
5
5
platform: ruby
6
6
authors:
7
7
- jagger
8
8
autorequire:
9
9
bindir: exe
10
10
cert_chain: []
11
- date: 2019-07-19 00:00:00.000000000 Z
11
+ date: 2019-09-11 00:00:00.000000000 Z
12
12
dependencies:
13
13
- !ruby/object:Gem::Dependency
14
14
name: rspec