Skip to content

Commit

Permalink
input/output/converter: input output pattern
Browse files Browse the repository at this point in the history
This provides a backbone for implementing #72.

Convert reverse_adoc to Coradoc::Input::HTML (fixes #100)

This commit also implements Input::{Adoc,Docx} and Output::Adoc,
based on existing code.

Everything is handled by a new Converter class and our current
CLI tools are refactored to use the new Converter class. This gives
us a solid framework for implementing #64.
  • Loading branch information
hmdne committed Aug 23, 2024
1 parent ca77435 commit b8ca0c2
Show file tree
Hide file tree
Showing 12 changed files with 338 additions and 84 deletions.
54 changes: 22 additions & 32 deletions exe/reverse_adoc
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,52 @@ require "rubygems"
require "bundler/setup"

require "coradoc/input/html"
require "coradoc/converter"
require "optparse"
require "fileutils"

config = {
input_options: input_options = {},
input_processor: :html,
output_options: output_options = {},
output_processor: :adoc,
}
destination = nil

OptionParser.new do |opts|
opts.banner = "Usage: reverse_adoc [options] <file>"
opts.on("-m", "--mathml2asciimath", "Convert MathML to AsciiMath") do |_v|
Coradoc::Input::HTML.config.mathml2asciimath = true
input_options[:mathml2asciimath] = true
end

opts.on("-oFILENAME", "--output=FILENAME", "Output file to write to") do |v|
Coradoc::Input::HTML.config.destination = File.expand_path(v)
destination = File.expand_path(v)
# puts "output goes to #{Coradoc::Input::HTML.config.destination}"
end

opts.on("-e", "--external-images", "Export images if data URI") do |_v|
Coradoc::Input::HTML.config.external_images = true
input_options[:external_images] = true
end

opts.on("-u", "--unknown_tags [pass_through, drop, bypass, raise]",
"Unknown tag handling (default: pass_through)") do |v|
Coradoc::Input::HTML.config.unknown_tags = v
input_options[:unknown_tags] = v
end

opts.on("-r", "--require RUBYMODULE", "Require additional Ruby file") do |v|
require v
end

opts.on("--track-time", "Track time spent on each step") do
Coradoc::Input::HTML.config.track_time = true
input_options[:track_time] = true
end

opts.on("--split-sections LEVEL", "Split sections up to LEVEL") do |i|
Coradoc::Input::HTML.config.split_sections = i.to_i
input_options[:split_sections] = i.to_i
end

opts.on("-v", "--version", "Version information") do |_v|
puts "reverse_adoc: v#{Coradoc::Input::HTML::VERSION}"
puts "Coradoc: v#{Coradoc::VERSION}"
exit
end

Expand All @@ -52,40 +61,21 @@ OptionParser.new do |opts|
end.parse!

if filename = ARGV.pop
input_content = IO.read(filename)
Coradoc::Input::HTML.config.sourcedir = File.dirname(File.expand_path(filename))
input_content = filename
else
if Coradoc::Input::HTML.config.external_images
if input_options[:external_images]
raise "The -e | --external-images feature cannot be used with STDIN input. Exiting."
end

input_content = ARGF.read
input_content = ARGF
end

if Coradoc::Input::HTML.config.external_images && Coradoc::Input::HTML.config.destination.nil?
if input_options[:external_images] && destination.nil?
raise "The -e | --external-images feature must be used with -o | --output. Exiting."
end

if Coradoc::Input::HTML.config.split_sections && Coradoc::Input::HTML.config.destination.nil?
if input_options[:split_sections] && destination.nil?
raise "The --split_sections feature must be used with -o | --output. Exiting."
end

# Read from STDIN
adoc_content = Coradoc::Input::HTML.convert(input_content)

# Print to STDOUT
unless Coradoc::Input::HTML.config.destination
puts adoc_content
exit
end

# Write output to Coradoc::Input::HTML.config.destination
adoc_content = {nil => adoc_content} unless adoc_content.is_a? Hash

adoc_content.each do |file, content|
destination = Coradoc::Input::HTML.config.destination
destdir = File.dirname(destination)
filename = file ? "#{destdir}/#{file}" : destination
FileUtils.mkdir_p(File.dirname(filename))
File.write(filename, content)
end
Coradoc::Converter.(input_content, destination, **config)
42 changes: 15 additions & 27 deletions exe/w2a
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,35 @@ require "bundler/setup"

require "word-to-markdown"
require "optparse"
require "coradoc/input/html"
require "coradoc"

ARGV.push("-h") if ARGV.empty?

config = {
input_options: input_options = {},
input_processor: :docx,
output_options: output_options = {},
output_processor: :adoc,
}
destination = nil

OptionParser.new do |opts|
opts.banner = "Usage: w2a [options] <file>"
opts.on("-m", "--mathml2asciimath", "Convert MathML to AsciiMath") do |_v|
Coradoc::Input::HTML.config.mathml2asciimath = true
input_options[:mathml2asciimath] = true
end

opts.on("-oFILENAME", "--output=FILENAME", "Output file to write to") do |v|
Coradoc::Input::HTML.config.destination = File.expand_path(v)
destination = File.expand_path(v)
# puts "output goes to #{Coradoc::Input::HTML.config.destination}"
end

opts.on("-e", "--external-images", "Export images if data URI") do |_v|
Coradoc::Input::HTML.config.external_images = true
input_options[:external_images] = true
end

opts.on("-v", "--version", "Version information") do |_v|
puts "reverse_adoc: v#{Coradoc::Input::HTML::VERSION}"
puts "Coradoc: v#{Coradoc::VERSION}"
puts "[dependency] WordToMarkdown: v#{WordToMarkdown::VERSION}"
if Gem.win_platform?
puts "[dependency] LibreOffice: version not available on Windows"
Expand All @@ -45,28 +53,8 @@ end.parse!
filename = ARGV.pop
raise "Please provide an input file to process. Exiting." unless filename

if Coradoc::Input::HTML.config.external_images && Coradoc::Input::HTML.config.destination.nil?
if input_options[:external_images] && destination.nil?
raise "The -e | --external-images feature must be used with -o | --output. Exiting."
end

Coradoc::Input::HTML.config.sourcedir = Dir.mktmpdir

doc = WordToMarkdown.new(filename, Coradoc::Input::HTML.config.sourcedir)
# File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
adoc_content = Coradoc::Input::HTML.convert(
Coradoc::Input::HTML.cleaner.preprocess_word_html(doc.document.html),
WordToMarkdown::REVERSE_MARKDOWN_OPTIONS,
)
# puts scrub_whitespace(doc.document.html)

# Print to STDOUT
unless Coradoc::Input::HTML.config.destination
puts adoc_content
exit
end

# Write output to Coradoc::Input::HTML.config.destination
FileUtils.mkdir_p(File.dirname(Coradoc::Input::HTML.config.destination))
File.open(Coradoc::Input::HTML.config.destination, "w") do |file|
file.write(adoc_content)
end
Coradoc::Converter.(filename, destination, **config)
13 changes: 8 additions & 5 deletions lib/coradoc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
require "pathname"

require "parslet"
require_relative "coradoc/version"
require_relative "coradoc/util"
require_relative "coradoc/parser"
require_relative "coradoc/transformer"
require_relative "coradoc/generator"
require "coradoc/version"
require "coradoc/util"
require "coradoc/parser"
require "coradoc/transformer"
require "coradoc/generator"
require "coradoc/converter"
require "coradoc/input"
require "coradoc/output"

module Coradoc
class Error < StandardError; end
Expand Down
139 changes: 139 additions & 0 deletions lib/coradoc/converter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
require "fileutils"

module Coradoc
class Converter
attr_accessor :input, :output, :config

def initialize(input = nil, output = nil, **config)
@input = input || $stdin
@output = output || $stdout

@config = {
input_options: {},
output_options: {},
}.merge(config)

yield if block_given?
end

def self.call(*args, **kwargs, &block)
new(*args, **kwargs, &block).convert
end

def input_processor
if config[:input_processor]
Input[config[:input_processor]]
else
Input.select_processor(input)
end
end

def output_processor
if config[:output_processor]
Output[config[:output_processor]]
else
Output.select_processor(output)
end
end

def convert(data = nil)
input_id = input_processor.processor_id
output_id = output_processor.processor_id

unless data
input = self.input
input = File.open(input, "rb") if input.is_a? String
data = input.read
input_path = input.path if input.respond_to? :path
end

# Some input processors may prefer filenames
if input_processor.respond_to? :processor_wants_filenames
unless input.respond_to? :path
raise NoInputPathError,
"no input path given, but #{input_processor} wants that " +
"form. Ensure you don't read from standard input."
end

data = input.path
end

# We may need to configure destination path.
output = self.output
if output.is_a? String
FileUtils.mkdir_p(File.dirname(output))
output = File.open(output, "wb")
end
output_path = output.path if output.respond_to?(:path)

input_options = config[:input_options]
input_options = input_options.merge(destination: output_path) if output_path
input_options = input_options.merge(sourcedir: File.dirname(input_path)) if input_path

data = input_processor.processor_execute(data, input_options)

# Two options are possible at this point:
# Either we have a document we want to write to some output, or
# we have a Hash, that contains a list of files and their
# documents (where a nil key denotes the main file). Let's normalize
# those cases.
data = { nil => data } unless data.is_a? Hash

# Let's check an edge case of non-nil keys and no output path
if !output_path && data.keys.any? { |i| !i.nil? }
raise NoOutputPathError,
"no output path given, while wanting to write multiple files"
end

data = output_processor.processor_execute(data, config[:output_options])

if input_processor.respond_to?(:processor_postprocess)
data = input_processor.processor_postprocess(
data, input_options.merge(output_processor: output_id)
)
end

# Now we have all, let's write.
data.each do |filename, content|
if filename.nil?
file = output
else
dirname = File.dirname(output_path)
file = "#{dirname}/#{filename}"
FileUtils.mkdir_p(File.dirname(file))
file = File.open(file, "wb")
end

file.write(content)
file.close
end
end

class NoInputPathError < StandardError; end
class NoOutputPathError < StandardError; end
class NoProcessorError < StandardError; end

module CommonInputOutputMethods
def define(const)
@processors[const.processor_id] = const
end

def [](id)
@processors[id.to_sym]
end

def select_processor(filename)
filename = filename.path if filename.respond_to? :path
unless filename.is_a? String
raise Converter::NoProcessorError,
"can't find a path for #{filename}. You must manually select the processor."
end

@processors.values.find do |i|
i.processor_match?(filename)
end or raise Converter::NoProcessorError,
"you must manually select the processor for #{filename}"
end
end
end
end
8 changes: 7 additions & 1 deletion lib/coradoc/input.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
require_relative "../coradoc"
require "coradoc"

module Coradoc
module Input
@processors = {}
extend Converter::CommonInputOutputMethods
end
end

require "coradoc/input/adoc"
require "coradoc/input/docx"
require "coradoc/input/html"
19 changes: 19 additions & 0 deletions lib/coradoc/input/adoc.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
require "coradoc/input"

module Coradoc
module Input::Adoc
def self.processor_id
:adoc
end

def self.processor_match?(filename)
%w[.adoc].any? { |i| filename.downcase.end_with?(i) }
end

def self.processor_execute(input, _options = {})
Coradoc::Parser.parse(input)
end

Coradoc::Input.define(self)
end
end
Loading

0 comments on commit b8ca0c2

Please sign in to comment.