1 require 'mechanize'
 2 require 'pry'
 3 
 4 module Runner
 5   URL_PREFIX = "http://d.360buy.com/area/get?fid="
 6   CHILDREN_NODE_CONTAINER = []
 7   MAX_HEIGHT = 3
 8   AGENT = Mechanize.new
 9 
10   def self.get_page_until_succeed(url)
11     begin
12       puts "GET #{url.inspect}"
13       return AGENT.get(url)
14     rescue => e
15       puts e.inspect
16       puts "Request failed. Retrying..."
17       get_page_until_succeed(url)
18     end
19   end
20 
21   def self.get_page_then_parse_until_succeed(url)
22     page = get_page_until_succeed(url)
23 
24     begin
25       parse_result = JSON.parse(page.body)
26       puts "Parse result: #{parse_result.inspect}"
27       return parse_result
28     rescue => e
29       puts e.inspect
30       puts "Parse failed. Retrying..."
31       get_page_then_parse_until_succeed(url)
32     end
33   end
34 
35   def self.find_and_save_all_children(parent, parent_height)
36     puts "Parent: #{parent}"
37     parse_result = get_page_then_parse_until_succeed("#{URL_PREFIX}#{parent['id']}")
38     if parse_result.class == Array and not parse_result.empty?
39       puts "All children of parent found."
40       CHILDREN_NODE_CONTAINER.concat(parse_result.map { |child| child["parent_id"] = parent["id"] })
41 
42       children_height = parent_height + 1
43       if (children_height >= 0) and (children_height < MAX_HEIGHT)
44         parse_result.each { |child| find_and_save_all_children(child, children_height) }
45       end
46     end
47   end
48 
49   def self.run
50     puts "---------START-----------"
51     root_node_container = JSON.parse(File.open("./root_nodes.json").read())
52     if MAX_HEIGHT > 0
53       root_node_container.each { |root_node| find_and_save_all_children(root_node, 0) }
54     end
55 
56     File.open("./jd_areas.json", "w") do |file|
57       file.write JSON.generate(CHILDREN_NODE_CONTAINER)
58     end
59     puts "----------END-----------"
60   end
61 end
62 
63 Runner.run