| Class | WWW::Mechanize::Page |
| In: |
lib/mechanize/page.rb
|
| Parent: | File |
This class encapsulates an HTML page. If Mechanize finds a content type of ‘text/html’, this class will be instantiated and returned.
require 'rubygems'
require 'mechanize'
agent = WWW::Mechanize.new
agent.get('http://google.com/').class #=> WWW::Mechanize::Page
| parser | -> | root |
| bases | [R] | |
| forms | [R] | |
| frames | [R] | |
| iframes | [R] | |
| links | [R] | |
| mech | [RW] | |
| meta | [R] | |
| parser | [R] | |
| title | [R] | |
| watch_for_set | [R] | |
| watches | [R] |
# File lib/mechanize/page.rb, line 27
27: def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
28: super(uri, response, body, code)
29: @watch_for_set ||= {}
30: @mech ||= mech
31:
32: raise Mechanize::ContentTypeError.new(response['content-type']) unless
33: content_type() =~ /^text\/html/
34:
35: # construct parser and feed with HTML
36: if body && response
37: @parser ||= Hpricot.parse(body)
38: parse_html
39: end
40: end
Get the content type
# File lib/mechanize/page.rb, line 43
43: def content_type
44: @response['content-type']
45: end
# File lib/mechanize/page.rb, line 52
52: def watch_for_set=(obj)
53: @watch_for_set = obj
54: parse_html if @body && @watch_for_set
55: end
# File lib/mechanize/page.rb, line 66
66: def parse_html
67: @forms = WWW::Mechanize::List.new
68: @links = WWW::Mechanize::List.new
69: @meta = WWW::Mechanize::List.new
70: @frames = WWW::Mechanize::List.new
71: @iframes = WWW::Mechanize::List.new
72: @bases = WWW::Mechanize::List.new
73: @watches = {}
74:
75: # Set the title
76: @title = if (@parser/'title').text.length > 0
77: (@parser/'title').text
78: end
79:
80: # Find all 'base' tags
81: (@parser/'base').each do |node|
82: @bases << Base.new(node, @mech, self)
83: end
84:
85: # Find all the form tags
86: (@parser/'form').each do |html_form|
87: form = Form.new(html_form, @mech, self)
88: form.action ||= @uri
89: @forms << form
90: end
91:
92: # Find all the 'a' tags
93: (@parser/'a').each do |node|
94: @links << Link.new(node, @mech, self)
95: end
96:
97: # Find all the 'area' tags
98: (@parser/'area').each do |node|
99: @links << Link.new(node, @mech, self)
100: end
101:
102: # Find all 'meta' tags
103: (@parser/'meta').each do |node|
104: next unless node['http-equiv']
105: next unless node['content']
106: equiv = node['http-equiv']
107: content = node['content']
108: if equiv != nil && equiv.downcase == 'refresh'
109: if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
110: node['href'] = $1
111: @meta << Meta.new(node, @mech, self)
112: end
113: end
114: end
115:
116: # Find all 'frame' tags
117: (@parser/'frame').each do |node|
118: @frames << Frame.new(node, @mech, self)
119: end
120:
121: # Find all 'iframe' tags
122: (@parser/'iframe').each do |node|
123: @iframes << Frame.new(node, @mech, self)
124: end
125:
126: # Find all watch tags
127: unless @watch_for_set.nil?
128: @watch_for_set.each do |key, klass|
129: (@parser/key).each do |node|
130: @watches[key] ||= []
131: @watches[key] << (klass ? klass.new(node) : node)
132: end
133: end
134: end
135: end