Skip to content

Commit f602681

Browse files
committed
Implement OpenGraph library
1 parent 7068465 commit f602681

14 files changed

+328
-21
lines changed

Gemfile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ source "http://rubygems.org"
66
# Add dependencies to develop your gem here.
77
# Include everything needed to run rake, tests, features, etc.
88
group :development do
9-
gem "rspec", "~> 2.8.0"
10-
gem "rdoc", "~> 3.12"
11-
gem "bundler", "~> 1.0.0"
12-
gem "jeweler", "~> 1.8.4"
13-
gem "rcov", ">= 0"
9+
gem "rspec"
10+
gem "rdoc"
11+
gem "bundler"
12+
gem "jeweler"
13+
gem "nokogiri"
1414
end

Gemfile.lock

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
GEM
2+
remote: http://rubygems.org/
3+
specs:
4+
diff-lcs (1.1.3)
5+
git (1.2.5)
6+
jeweler (1.8.4)
7+
bundler (~> 1.0)
8+
git (>= 1.2.5)
9+
rake
10+
rdoc
11+
json (1.7.5)
12+
nokogiri (1.5.5)
13+
rake (0.9.2.2)
14+
rdoc (3.12)
15+
json (~> 1.4)
16+
rspec (2.11.0)
17+
rspec-core (~> 2.11.0)
18+
rspec-expectations (~> 2.11.0)
19+
rspec-mocks (~> 2.11.0)
20+
rspec-core (2.11.1)
21+
rspec-expectations (2.11.3)
22+
diff-lcs (~> 1.1.3)
23+
rspec-mocks (2.11.3)
24+
25+
PLATFORMS
26+
ruby
27+
28+
DEPENDENCIES
29+
bundler
30+
jeweler
31+
nokogiri
32+
rdoc
33+
rspec

README.rdoc

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,36 @@
1-
= opengraph_parser
1+
= OpengraphParser
22

3-
Description goes here.
3+
OpengraphParser is a simple Ruby library for parsing Open Graph protocol information from a web site. Learn more about the protocol at:
4+
http://ogp.me
5+
6+
== Installation
7+
gem install opengraph_parser
8+
9+
or add to Gemfile
10+
11+
gem "opengraph_parser"
12+
13+
== Usage
14+
og = OpenGraph.new("http://ogp.me")
15+
og.title # => "Open Graph protocol"
16+
og.type # => "website"
17+
og.url # => "http://ogp.me/"
18+
og.description # => "The Open Graph protocol enables any web page to become a rich object in a social graph."
19+
og.images # => ["http://ogp.me/logo.png"]
20+
21+
You can also get other Open Graph metadata as:
22+
og.metadata # => {"og:image:type"=>"image/png", "og:image:width"=>"300", "og:image:height"=>"300"}
23+
24+
If you try to parse Open Graph information for a website that doesn’t have any Open Graph metadata, the library will try to find other information in the website as the following rules:
25+
<title> for title
26+
<meta name="description"> for description
27+
<link rel="image_src"> or all <img> tags for images
28+
29+
You can disable this fallback lookup by passing false to init method:
30+
og = OpenGraph.new("http://ogp.me", false)
431

532
== Contributing to opengraph_parser
6-
33+
734
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
835
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
936
* Fork the project.
@@ -16,4 +43,3 @@ Description goes here.
1643

1744
Copyright (c) 2012 Huy Ha. See LICENSE.txt for
1845
further details.
19-

Rakefile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ Jeweler::Tasks.new do |gem|
1717
gem.name = "opengraph_parser"
1818
gem.homepage = "http://github.com/huyha85/opengraph_parser"
1919
gem.license = "MIT"
20-
gem.summary = %Q{TODO: one-line summary of your gem}
21-
gem.description = %Q{TODO: longer description of your gem}
20+
gem.summary = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website.}
21+
gem.description = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website. It also includes a fallback solution when the website has no Open Graph information.}
2222
gem.email = "[email protected]"
23-
gem.authors = ["Huy Ha"]
23+
gem.authors = ["Huy Ha", "Duc Trinh"]
2424
# dependencies defined in Gemfile
25+
gem.files = Dir.glob('lib/**/*.rb')
2526
end
2627
Jeweler::RubygemsDotOrgTasks.new
2728

VERSION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0.1.0

lib/open_graph.rb

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
require 'nokogiri'
2+
require 'redirect_follower'
3+
4+
class OpenGraph
5+
attr_accessor :src, :url, :type, :title, :description, :images, :metadata, :response
6+
7+
def initialize(src, fallback = true)
8+
@src = src
9+
@images = []
10+
@metadata = {}
11+
parse_opengraph
12+
load_fallback if fallback
13+
check_images_path
14+
end
15+
16+
def parse_opengraph
17+
begin
18+
@response = RedirectFollower.new(@src).resolve
19+
rescue
20+
@title = @url = @src
21+
return
22+
end
23+
24+
if @response && @response.body
25+
attrs_list = %w(title url type description)
26+
doc = Nokogiri.parse(@response.body)
27+
doc.css('meta').each do |m|
28+
if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
29+
m_content = m.attribute('content').to_s.strip
30+
case metadata_name = m.attribute('property').to_s.gsub("og:", "")
31+
when *attrs_list
32+
self.instance_variable_set("@#{metadata_name}", m_content) unless m_content.empty?
33+
when "image"
34+
add_image(m_content)
35+
else
36+
@metadata[m.attribute('property').to_s] = m_content
37+
end
38+
end
39+
end
40+
end
41+
end
42+
43+
def load_fallback
44+
if @response && @response.body
45+
doc = Nokogiri.parse(@response.body)
46+
47+
if @title.to_s.empty? && doc.xpath("//head/title").size > 0
48+
@title = doc.xpath("//head/title").first.text.to_s.strip
49+
end
50+
51+
@url = @src if @url.to_s.empty?
52+
53+
if @description.to_s.empty? && description_meta = doc.xpath("//head/meta[@name='description']").first
54+
@description = description_meta.attribute("content").to_s.strip
55+
end
56+
57+
fetch_images(doc, "//head/link[@rel='image_src']", "href") if @images.empty?
58+
fetch_images(doc, "//img", "src") if @images.empty?
59+
end
60+
end
61+
62+
def check_images_path
63+
uri = URI.parse(URI.escape(@src))
64+
imgs = @images.dup
65+
@images = []
66+
imgs.each do |img|
67+
if URI.parse(URI.escape(img)).host.nil?
68+
add_image("#{uri.scheme}://#{uri.host}:#{uri.port}#{img}")
69+
else
70+
add_image(img)
71+
end
72+
end
73+
end
74+
75+
private
76+
def add_image(image_url)
77+
@images << image_url unless @images.include?(image_url) || image_url.to_s.empty?
78+
end
79+
80+
def fetch_images(doc, xpath_str, attr)
81+
doc.xpath(xpath_str).each do |link|
82+
add_image(link.attribute(attr).to_s.strip)
83+
end
84+
end
85+
end

lib/opengraph_parser.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
require 'open_graph'

lib/redirect_follower.rb

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
require 'net/http'
2+
3+
class RedirectFollower
4+
class TooManyRedirects < StandardError; end
5+
6+
attr_accessor :url, :body, :redirect_limit, :response
7+
8+
def initialize(url, limit = 5)
9+
@url, @redirect_limit = url, limit
10+
end
11+
12+
def resolve
13+
raise TooManyRedirects if redirect_limit < 0
14+
15+
self.response = Net::HTTP.get_response(URI.parse(URI.escape(url)))
16+
17+
if response.kind_of?(Net::HTTPRedirection)
18+
self.url = redirect_url
19+
self.redirect_limit -= 1
20+
resolve
21+
end
22+
23+
self.body = response.body
24+
self
25+
end
26+
27+
def redirect_url
28+
if response['location'].nil?
29+
response.body.match(/<a href=\"([^>]+)\">/i)[1]
30+
else
31+
response['location']
32+
end
33+
end
34+
end

spec/lib/open_graph_spec.rb

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2+
3+
describe OpenGraph do
4+
describe "#initialize" do
5+
context "with invalid src" do
6+
it "should set title and url the same as src" do
7+
og = OpenGraph.new("invalid")
8+
og.src.should == "invalid"
9+
og.title.should == "invalid"
10+
og.url.should == "invalid"
11+
end
12+
end
13+
14+
context "with no fallback" do
15+
it "should get values from opengraph metadata" do
16+
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read })
17+
RedirectFollower.stub(:new) { double(resolve: response) }
18+
19+
og = OpenGraph.new("http://test.host", false)
20+
og.src.should == "http://test.host"
21+
og.title.should == "OpenGraph Title"
22+
og.type.should == "article"
23+
og.url.should == "http://test.host"
24+
og.description.should == "My OpenGraph sample site for Rspec"
25+
og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"]
26+
end
27+
end
28+
29+
context "with fallback" do
30+
context "when website has opengraph metadata" do
31+
it "should get values from opengraph metadata" do
32+
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read })
33+
RedirectFollower.stub(:new) { double(resolve: response) }
34+
35+
og = OpenGraph.new("http://test.host")
36+
og.src.should == "http://test.host"
37+
og.title.should == "OpenGraph Title"
38+
og.type.should == "article"
39+
og.url.should == "http://test.host"
40+
og.description.should == "My OpenGraph sample site for Rspec"
41+
og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"]
42+
end
43+
end
44+
45+
context "when website has no opengraph metadata" do
46+
it "should lookup for other data from website" do
47+
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph_no_metadata.html", 'r') { |f| f.read })
48+
RedirectFollower.stub(:new) { double(resolve: response) }
49+
50+
og = OpenGraph.new("http://test.host")
51+
og.src.should == "http://test.host"
52+
og.title.should == "OpenGraph Title Fallback"
53+
og.type.should be_nil
54+
og.url.should == "http://test.host"
55+
og.description.should == "Short Description Fallback"
56+
og.images.should == ["http://test.host:80/images/wall1.jpg", "http://test.host:80/images/wall2.jpg"]
57+
end
58+
end
59+
end
60+
end
61+
end

spec/lib/redirect_follower_spec.rb

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2+
3+
describe RedirectFollower do
4+
describe "#resolve" do
5+
let(:url) { "http://test.host" }
6+
let(:mock_res) { double(body: "Body is here.") }
7+
let(:mock_redirect) {
8+
m = double(body: %Q{<body><a href="http://new.test.host"></a></body>}, kind_of?: Net::HTTPRedirection)
9+
m.stub(:[]).and_return(nil)
10+
m
11+
}
12+
13+
context "with no redirection" do
14+
it "should return the response" do
15+
Net::HTTP.should_receive(:get_response).and_return(mock_res)
16+
17+
res = RedirectFollower.new(url).resolve
18+
res.body.should == "Body is here."
19+
res.redirect_limit.should == 5
20+
end
21+
end
22+
23+
context "with redirection" do
24+
it "should follow the link in redirection" do
25+
Net::HTTP.should_receive(:get_response).with(URI.parse(URI.escape(url))).and_return(mock_redirect)
26+
Net::HTTP.should_receive(:get_response).with(URI.parse(URI.escape("http://new.test.host"))).and_return(mock_res)
27+
28+
res = RedirectFollower.new(url).resolve
29+
res.body.should == "Body is here."
30+
res.redirect_limit.should == 4
31+
end
32+
end
33+
34+
context "with unlimited redirection" do
35+
it "should raise TooManyRedirects error" do
36+
Net::HTTP.stub(:get_response).and_return(mock_redirect)
37+
lambda {
38+
RedirectFollower.new(url).resolve
39+
}.should raise_error(RedirectFollower::TooManyRedirects)
40+
end
41+
end
42+
end
43+
end

0 commit comments

Comments
 (0)