1 require 'mechanize' 2 require 'pry' 3 4 module Runner 5 URL_PREFIX = "http://d.360buy.com/area/get?fid=" 6 CHILDREN_NODE_CONTAINER = [] 7 MAX_HEIGHT = 3 8 AGENT = Mechanize.new 9 10 def self.get_page_until_succeed(url) 11 begin 12 puts "GET #{url.inspect}" 13 return AGENT.get(url) 14 rescue => e 15 puts e.inspect 16 puts "Request failed. Retrying..." 17 get_page_until_succeed(url) 18 end 19 end 20 21 def self.get_page_then_parse_until_succeed(url) 22 page = get_page_until_succeed(url) 23 24 begin 25 parse_result = JSON.parse(page.body) 26 puts "Parse result: #{parse_result.inspect}" 27 return parse_result 28 rescue => e 29 puts e.inspect 30 puts "Parse failed. Retrying..." 31 get_page_then_parse_until_succeed(url) 32 end 33 end 34 35 def self.find_and_save_all_children(parent, parent_height) 36 puts "Parent: #{parent}" 37 parse_result = get_page_then_parse_until_succeed("#{URL_PREFIX}#{parent['id']}") 38 if parse_result.class == Array and not parse_result.empty? 39 puts "All children of parent found." 40 CHILDREN_NODE_CONTAINER.concat(parse_result.map { |child| child["parent_id"] = parent["id"] }) 41 42 children_height = parent_height + 1 43 if (children_height >= 0) and (children_height < MAX_HEIGHT) 44 parse_result.each { |child| find_and_save_all_children(child, children_height) } 45 end 46 end 47 end 48 49 def self.run 50 puts "---------START-----------" 51 root_node_container = JSON.parse(File.open("./root_nodes.json").read()) 52 if MAX_HEIGHT > 0 53 root_node_container.each { |root_node| find_and_save_all_children(root_node, 0) } 54 end 55 56 File.open("./jd_areas.json", "w") do |file| 57 file.write JSON.generate(CHILDREN_NODE_CONTAINER) 58 end 59 puts "----------END-----------" 60 end 61 end 62 63 Runner.run