Skip to content

Commit

Permalink
Merge branch 'NielsSteensma-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
jznadams committed Jun 24, 2020
2 parents d35a70b + b04b48b commit bccc58c
Show file tree
Hide file tree
Showing 17 changed files with 630 additions and 147 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
/spec/examples.txt
/test/tmp/
/test/version_tmp/
/node_modules/
/tmp/
.idea/*

Expand Down
3 changes: 2 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ GEM
ttfunk (1.6.2.1)

PLATFORMS
ruby
x64-mingw32

DEPENDENCIES
Expand All @@ -48,4 +49,4 @@ DEPENDENCIES
rspec (~> 3.0)

BUNDLED WITH
1.16.1
1.17.2
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ end
To return the PNG from a Rails controller you can do the following:
```
def example_controller_method
binary_png = Dhalang::Screenshot.get_from_url_as_png("https://www.google.nl")
binary_png = Dhalang::Screenshot.get_from_url_as_png("https://www.google.com")
send_data(binary_png, filename: 'screenshotofgoogle.png', type: 'image/png')
end
```

To return the JPEG from a Rails controller you can do the following:
```
def example_controller_method
binary_jpeg = Dhalang::Screenshot.get_from_url_as_jpeg("https://www.google.nl")
binary_jpeg = Dhalang::Screenshot.get_from_url_as_jpeg("https://www.google.com")
send_data(binary_jpeg, filename: 'screenshotofgoogle.jpeg', type: 'image/jpeg')
end
```
11 changes: 9 additions & 2 deletions lib/Dhalang.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
module Dhalang
require 'PDF'
require 'Screenshot'
require_relative 'PDF'
require_relative 'Screenshot'
require_relative 'Dhalang/version'
require_relative 'Dhalang/url_utils'
require_relative 'Dhalang/file_utils'
require_relative 'Dhalang/puppeteer'
require 'uri'
require 'tempfile'
require 'shellwords'
end
37 changes: 37 additions & 0 deletions lib/Dhalang/file_utils.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module Dhalang
# Contains common logic for files.
class FileUtils

# Reads the file under the given filepath as a binary.
#
# @param [String] file_path The absolute path of the file to read.
#
# @return [String] The binary content under the file_path.
def self.read_binary(file_path)
IO.binread(file_path)
end

# Creates a new temp file.
#
# @param [String] extension The extension of the file.
# @param [String] content The content of the file. (Optional)
#
# @return [Tempfile] The created temp file.
def self.create_temp_file(extension, content = nil)
temp_file = Tempfile.new(["dhalang",".#{extension}"])
unless(content == nil)
temp_file.write(content)
temp_file.rewind
end
temp_file
end

# Deletes the given file.
#
# @param [File] file The file to delete.
def self.delete(file)
file.close unless file.closed?
file.unlink
end
end
end
17 changes: 17 additions & 0 deletions lib/Dhalang/puppeteer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
module Dhalang
# Contains common logic for interacting with Puppeteer.
class Puppeteer
NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
private_constant :NODE_MODULES_PATH

# Launches a new Node process, executing the (Puppeteer) script under the given script_path.
#
# @param [String] page_url The url to pass to the goTo method of Puppeteer.
# @param [String] script_path The absolute path of the JS script to execute.
# @param [String] temp_file_path The absolute path of the temp file to use to write any actions tom from Puppeteer.
# @param [String] temp_file_extension The extension of the temp file.
def self.visit(page_url, script_path, temp_file_path, temp_file_extension)
system("node #{script_path} #{Shellwords.escape(NODE_MODULES_PATH)} #{page_url} #{Shellwords.escape(temp_file_path)} #{Shellwords.escape(temp_file_extension)}")
end
end
end
14 changes: 14 additions & 0 deletions lib/Dhalang/url_utils.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module Dhalang
# Contains common logic for URL's.
class UrlUtils

# Raises an error if the given URL cannot be used for navigation with Puppeteer.
#
# @param [String] url The url to validate
def self.validate(url)
if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
end
end
end
end
2 changes: 1 addition & 1 deletion lib/Dhalang/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Dhalang
VERSION = "0.2.0"
VERSION = "0.3.0"
end
84 changes: 35 additions & 49 deletions lib/PDF.rb
Original file line number Diff line number Diff line change
@@ -1,65 +1,51 @@
require "Dhalang/version"
require 'uri'
require 'tempfile'

module Dhalang
# Allows consumers of this library to create PDFs with Puppeteer.
class PDF
PDF_GENERATOR_JS_PATH = File.expand_path('../js/pdfgenerator.js', __FILE__)
PROJECT_PATH = Dir.pwd + '/node_modules/'

PUPPETEER_SCRIPT_PATH = File.expand_path('../js/pdf-generator.js', __FILE__).freeze
private_constant :PUPPETEER_SCRIPT_PATH

# Captures the full webpage under the given url as PDF.
#
# @param [String] url The url to get as PDF.
#
# @return [String] The PDF that was created as binary.
def self.get_from_url(url)
validate_url(url)
temporary_pdf_save_file = create_temporary_pdf_file
begin
visit_page_with_puppeteer(url, temporary_pdf_save_file.path)
binary_pdf_content = get_file_content_as_binary_string(temporary_pdf_save_file)
ensure
temporary_pdf_save_file.close unless temporary_pdf_save_file.closed?
temporary_pdf_save_file.unlink
end
return binary_pdf_content
UrlUtils.validate(url)
get(url)
end

# Captures the full HTML as PDF.
# Useful when creating dynamic content, for example invoices.
#
# @param [String] html The html to get as PDF.
#
# @return [String] The PDF that was created as binary.
def self.get_from_html(html)
html_file = create_temporary_html_file(html)
temporary_pdf_save_file = create_temporary_pdf_file
html_file = FileUtils.create_temp_file("html", html)
url = "file://" + html_file.path
begin
visit_page_with_puppeteer("file://" + html_file.path, temporary_pdf_save_file.path)
binary_pdf_content = get_file_content_as_binary_string(temporary_pdf_save_file)
binary_pdf_content = get(url)
ensure
temporary_pdf_save_file.close unless temporary_pdf_save_file.closed?
html_file.close unless html_file.closed?
temporary_pdf_save_file.unlink
html_file.unlink
FileUtils.delete(html_file)
end
return binary_pdf_content
end

private
def self.validate_url(url)
if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'

# Groups and executes the logic for creating a PDF of a webpage.
#
# @param [String] url The url to create a PDF for.
#
# @return [String] The PDF that was created as binary.
private_class_method def self.get(url)
temp_file = FileUtils.create_temp_file("pdf")
begin
Puppeteer.visit(url, PUPPETEER_SCRIPT_PATH, temp_file.path, "pdf")
binary_pdf_content = FileUtils.read_binary(temp_file.path)
ensure
FileUtils.delete(temp_file)
end
end

def self.create_temporary_pdf_file
Tempfile.new("pdf")
end

## Creates a temp .html file which can be browsed to by puppeteer for creating a pdf
def self.create_temporary_html_file(content)
html_file = Tempfile.new(['page', '.html'])
html_file.write(content)
html_file.rewind
return html_file
end

def self.visit_page_with_puppeteer(page_to_visit, path_to_save_pdf_to)
system("node #{PDF_GENERATOR_JS_PATH} #{page_to_visit} #{Shellwords.escape(path_to_save_pdf_to)} #{Shellwords.escape(PROJECT_PATH)}")
end

def self.get_file_content_as_binary_string(file)
IO.binread(file.path)
return binary_pdf_content
end
end
end
66 changes: 29 additions & 37 deletions lib/Screenshot.rb
Original file line number Diff line number Diff line change
@@ -1,51 +1,43 @@
require "Dhalang/version"
require 'uri'
require 'tempfile'

module Dhalang
# Allows consumers of this library to take screenshots with Puppeteer.
class Screenshot
SCREENSHOT_GENERATOR_JS_PATH = File.expand_path('../js/screenshotgenerator.js', __FILE__)
PROJECT_PATH = Dir.pwd + '/node_modules/'

PUPPETEER_SCRIPT_PATH = File.expand_path('../js/screenshot-generator.js', __FILE__).freeze
private_constant :PUPPETEER_SCRIPT_PATH

# Captures a full JPEG screenshot of the webpage under the given url.
#
# @param [String] url The url to take a screenshot of.
#
# @return [String] the screenshot that was taken as binary.
def self.get_from_url_as_jpeg(url)
validate_url(url)
get_image(url, :jpeg)
get(url, "jpeg")
end

# Captures a full PNG screenshot of the webpage under the given url.
#
# @param [String] url The url to take a screenshot of.
#
# @return [String] The screenshot that was taken as binary.
def self.get_from_url_as_png(url)
validate_url(url)
get_image(url, :png)
end

private
def self.validate_url(url)
if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
end
end

def self.create_temporary_screenshot_file
Tempfile.new("png")
get(url, "png")
end

def self.get_image(url, type)
temporary_screenshot_save_file = create_temporary_screenshot_file

# Groups and executes the logic for taking a screenhot of a webpage.
#
# @param [String] url The url to take a screenshot of.
# @param [String] image_type The image type to use for storing the screenshot.
#
# @return [String] The screenshot that was taken as binary.
private_class_method def self.get(url, image_type)
UrlUtils.validate(url)
temp_file = FileUtils.create_temp_file(image_type)
begin
visit_page_with_puppeteer(url, temporary_screenshot_save_file.path, type)
binary_image_content = get_file_content_as_binary_string(temporary_screenshot_save_file)
Puppeteer.visit(url, PUPPETEER_SCRIPT_PATH, temp_file.path, image_type)
binary_image_content = FileUtils.read_binary(temp_file.path)
ensure
temporary_screenshot_save_file.close unless temporary_screenshot_save_file.closed?
temporary_screenshot_save_file.unlink
FileUtils.delete(temp_file)
end
return binary_image_content
end

def self.visit_page_with_puppeteer(page_to_visit, path_to_save_pdf_to, image_save_type)
system("node #{SCREENSHOT_GENERATOR_JS_PATH} #{page_to_visit} #{Shellwords.escape(path_to_save_pdf_to)} #{Shellwords.escape(PROJECT_PATH)} #{Shellwords.escape(image_save_type)}")
end

def self.get_file_content_as_binary_string(file)
IO.binread(file.path)
end
end
end
54 changes: 54 additions & 0 deletions lib/js/dhalang.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/**
* @typedef {Object} Configuration
* @property {string} webPageUrl - The url of the webpage to visit.
* @property {string} tempFilePath - The path of the tempfile to write the screenshot/pdf to.
* @property {string} puppeteerModulePath - The path of the Puppeteer module.
* @property {string} imageType - The type of image to save ( undefined for pdfgenerator ).
*/

/**
* @typedef {Object} NavigationParameters
* @property {number} timeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
* @property {string} waituntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ).
*/

/**
* Generates a configuration object based on the given process arguments from Ruby.
* @param {Boolean} isForScreenshotGenerator - Indicates if this configuration is for a screenshot generator.
* @returns {Configuration}
* The generated configuration object.
*/
exports.getConfiguration = function (isForScreenshotGenerator) {
return {
puppeteerPath: process.argv[2],
webPageUrl: process.argv[3],
tempFilePath: process.argv[4],
imageType: isForScreenshotGenerator ? process.argv[5] : undefined
}
}

/**
* Launches Puppeteer and returns its instance.
* @param {string} puppeteerModulePath - The path puppeteer is under.
* @returns {Promise<Object>}
* The launched instance of Puppeteer.
*/
exports.launchPuppeteer = async function (puppeteerModulePath) {
module.paths.push(puppeteerModulePath);
const puppeteer = require('puppeteer');
return await puppeteer.launch({
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
}

/**
* Returns a new object containing the navigation parameters to use when navigating with Puppeteer to web pages.
* @returns {NavigationParameters}
* The navigation parameters to use.
*/
exports.getNavigationParameters = function () {
return {
timeout: 20000,
waitUntil: 'load'
}
}
Loading

0 comments on commit bccc58c

Please sign in to comment.