Skip to content

Commit

Permalink
input/html: Fix file and constant references
Browse files Browse the repository at this point in the history
  • Loading branch information
hmdne committed Aug 23, 2024
1 parent 6e8c7f8 commit ca77435
Show file tree
Hide file tree
Showing 91 changed files with 317 additions and 285 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ Coradoc::Parser.parse(sample_asciidoc)

This interface will return the abstract syntax tree.

### Converting from HTML to AsciiDoc (reverse_adoc)
### Converting from HTML to AsciiDoc (formerly reverse_adoc)

See: [reverse_adoc README](https://github.com/metanorma/coradoc/blob/main/lib/coradoc/reverse_adoc/README.adoc)
See: [Coradoc::Input::HTML README](https://github.com/metanorma/coradoc/blob/main/lib/input/html/README.adoc)

[sandi-metz]: http://robots.thoughtbot.com/post/50655960596/sandi-metz-rules-for-developers
34 changes: 17 additions & 17 deletions exe/reverse_adoc
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,44 @@
require "rubygems"
require "bundler/setup"

require "coradoc/reverse_adoc"
require "coradoc/input/html"
require "optparse"
require "fileutils"

OptionParser.new do |opts|
opts.banner = "Usage: reverse_adoc [options] <file>"
opts.on("-m", "--mathml2asciimath", "Convert MathML to AsciiMath") do |_v|
Coradoc::ReverseAdoc.config.mathml2asciimath = true
Coradoc::Input::HTML.config.mathml2asciimath = true
end

opts.on("-oFILENAME", "--output=FILENAME", "Output file to write to") do |v|
Coradoc::ReverseAdoc.config.destination = File.expand_path(v)
# puts "output goes to #{Coradoc::ReverseAdoc.config.destination}"
Coradoc::Input::HTML.config.destination = File.expand_path(v)
# puts "output goes to #{Coradoc::Input::HTML.config.destination}"
end

opts.on("-e", "--external-images", "Export images if data URI") do |_v|
Coradoc::ReverseAdoc.config.external_images = true
Coradoc::Input::HTML.config.external_images = true
end

opts.on("-u", "--unknown_tags [pass_through, drop, bypass, raise]",
"Unknown tag handling (default: pass_through)") do |v|
Coradoc::ReverseAdoc.config.unknown_tags = v
Coradoc::Input::HTML.config.unknown_tags = v
end

opts.on("-r", "--require RUBYMODULE", "Require additional Ruby file") do |v|
require v
end

opts.on("--track-time", "Track time spent on each step") do
Coradoc::ReverseAdoc.config.track_time = true
Coradoc::Input::HTML.config.track_time = true
end

opts.on("--split-sections LEVEL", "Split sections up to LEVEL") do |i|
Coradoc::ReverseAdoc.config.split_sections = i.to_i
Coradoc::Input::HTML.config.split_sections = i.to_i
end

opts.on("-v", "--version", "Version information") do |_v|
puts "reverse_adoc: v#{Coradoc::ReverseAdoc::VERSION}"
puts "reverse_adoc: v#{Coradoc::Input::HTML::VERSION}"
exit
end

Expand All @@ -53,37 +53,37 @@ end.parse!

if filename = ARGV.pop
input_content = IO.read(filename)
Coradoc::ReverseAdoc.config.sourcedir = File.dirname(File.expand_path(filename))
Coradoc::Input::HTML.config.sourcedir = File.dirname(File.expand_path(filename))
else
if Coradoc::ReverseAdoc.config.external_images
if Coradoc::Input::HTML.config.external_images
raise "The -e | --external-images feature cannot be used with STDIN input. Exiting."
end

input_content = ARGF.read
end

if Coradoc::ReverseAdoc.config.external_images && Coradoc::ReverseAdoc.config.destination.nil?
if Coradoc::Input::HTML.config.external_images && Coradoc::Input::HTML.config.destination.nil?
raise "The -e | --external-images feature must be used with -o | --output. Exiting."
end

if Coradoc::ReverseAdoc.config.split_sections && Coradoc::ReverseAdoc.config.destination.nil?
if Coradoc::Input::HTML.config.split_sections && Coradoc::Input::HTML.config.destination.nil?
raise "The --split_sections feature must be used with -o | --output. Exiting."
end

# Read from STDIN
adoc_content = Coradoc::ReverseAdoc.convert(input_content)
adoc_content = Coradoc::Input::HTML.convert(input_content)

# Print to STDOUT
unless Coradoc::ReverseAdoc.config.destination
unless Coradoc::Input::HTML.config.destination
puts adoc_content
exit
end

# Write output to Coradoc::ReverseAdoc.config.destination
# Write output to Coradoc::Input::HTML.config.destination
adoc_content = {nil => adoc_content} unless adoc_content.is_a? Hash

adoc_content.each do |file, content|
destination = Coradoc::ReverseAdoc.config.destination
destination = Coradoc::Input::HTML.config.destination
destdir = File.dirname(destination)
filename = file ? "#{destdir}/#{file}" : destination
FileUtils.mkdir_p(File.dirname(filename))
Expand Down
30 changes: 15 additions & 15 deletions exe/w2a
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,27 @@ require "bundler/setup"

require "word-to-markdown"
require "optparse"
require "coradoc/reverse_adoc"
require "coradoc/input/html"

ARGV.push("-h") if ARGV.empty?

OptionParser.new do |opts|
opts.banner = "Usage: w2a [options] <file>"
opts.on("-m", "--mathml2asciimath", "Convert MathML to AsciiMath") do |_v|
Coradoc::ReverseAdoc.config.mathml2asciimath = true
Coradoc::Input::HTML.config.mathml2asciimath = true
end

opts.on("-oFILENAME", "--output=FILENAME", "Output file to write to") do |v|
Coradoc::ReverseAdoc.config.destination = File.expand_path(v)
# puts "output goes to #{Coradoc::ReverseAdoc.config.destination}"
Coradoc::Input::HTML.config.destination = File.expand_path(v)
# puts "output goes to #{Coradoc::Input::HTML.config.destination}"
end

opts.on("-e", "--external-images", "Export images if data URI") do |_v|
Coradoc::ReverseAdoc.config.external_images = true
Coradoc::Input::HTML.config.external_images = true
end

opts.on("-v", "--version", "Version information") do |_v|
puts "reverse_adoc: v#{Coradoc::ReverseAdoc::VERSION}"
puts "reverse_adoc: v#{Coradoc::Input::HTML::VERSION}"
puts "[dependency] WordToMarkdown: v#{WordToMarkdown::VERSION}"
if Gem.win_platform?
puts "[dependency] LibreOffice: version not available on Windows"
Expand All @@ -45,28 +45,28 @@ end.parse!
filename = ARGV.pop
raise "Please provide an input file to process. Exiting." unless filename

if Coradoc::ReverseAdoc.config.external_images && Coradoc::ReverseAdoc.config.destination.nil?
if Coradoc::Input::HTML.config.external_images && Coradoc::Input::HTML.config.destination.nil?
raise "The -e | --external-images feature must be used with -o | --output. Exiting."
end

Coradoc::ReverseAdoc.config.sourcedir = Dir.mktmpdir
Coradoc::Input::HTML.config.sourcedir = Dir.mktmpdir

doc = WordToMarkdown.new(filename, Coradoc::ReverseAdoc.config.sourcedir)
doc = WordToMarkdown.new(filename, Coradoc::Input::HTML.config.sourcedir)
# File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
adoc_content = Coradoc::ReverseAdoc.convert(
Coradoc::ReverseAdoc.cleaner.preprocess_word_html(doc.document.html),
adoc_content = Coradoc::Input::HTML.convert(
Coradoc::Input::HTML.cleaner.preprocess_word_html(doc.document.html),
WordToMarkdown::REVERSE_MARKDOWN_OPTIONS,
)
# puts scrub_whitespace(doc.document.html)

# Print to STDOUT
unless Coradoc::ReverseAdoc.config.destination
unless Coradoc::Input::HTML.config.destination
puts adoc_content
exit
end

# Write output to Coradoc::ReverseAdoc.config.destination
FileUtils.mkdir_p(File.dirname(Coradoc::ReverseAdoc.config.destination))
File.open(Coradoc::ReverseAdoc.config.destination, "w") do |file|
# Write output to Coradoc::Input::HTML.config.destination
FileUtils.mkdir_p(File.dirname(Coradoc::Input::HTML.config.destination))
File.open(Coradoc::Input::HTML.config.destination, "w") do |file|
file.write(adoc_content)
end
6 changes: 6 additions & 0 deletions lib/coradoc/input.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
require_relative "../coradoc"

module Coradoc
module Input
end
end
22 changes: 11 additions & 11 deletions lib/coradoc/input/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@

require "digest"
require "nokogiri"
require_relative "../coradoc"
require_relative "reverse_adoc/errors"
require_relative "reverse_adoc/cleaner"
require_relative "reverse_adoc/config"
require_relative "reverse_adoc/converters"
require_relative "reverse_adoc/converters/base"
require_relative "reverse_adoc/html_converter"
require_relative "reverse_adoc/plugin"
require_relative "reverse_adoc/postprocessor"
require_relative "../input"
require_relative "html/errors"
require_relative "html/cleaner"
require_relative "html/config"
require_relative "html/converters"
require_relative "html/converters/base"
require_relative "html/html_converter"
require_relative "html/plugin"
require_relative "html/postprocessor"

module Coradoc
module ReverseAdoc
module Input::HTML
def self.convert(input, options = {})
Coradoc::ReverseAdoc::HtmlConverter.convert(input, options)
Coradoc::Input::HTML::HtmlConverter.convert(input, options)
end

def self.config
Expand Down
18 changes: 9 additions & 9 deletions lib/coradoc/input/html/README.adoc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
= AsciiDoc from HTML and Microsoft Word: reverse_adoc
= AsciiDoc from HTML and Microsoft Word: formerly reverse_adoc

== Purpose

Expand Down Expand Up @@ -253,9 +253,9 @@ Simple to use.

[source,ruby]
----
require 'coradoc/reverse_adoc'
require 'coradoc/input/html'
result = Coradoc::ReverseAdoc.convert input
result = Coradoc::Input::HTML.convert input
result.inspect # " *feelings* "
----

Expand All @@ -265,9 +265,9 @@ Just pass your chosen configuration options in after the input. The given option

[source,ruby]
----
require 'coradoc/reverse_adoc'
require 'coradoc/input/html'
Coradoc::ReverseAdoc.convert(input, unknown_tags: :raise, mathml2asciimath: true)
Coradoc::Input::HTML.convert(input, unknown_tags: :raise, mathml2asciimath: true)
----


Expand All @@ -277,9 +277,9 @@ Or configure it block style on a initializer level. These configurations will la

[source,ruby]
----
require 'coradoc/reverse_adoc'
require 'coradoc/input/html'
Coradoc::ReverseAdoc.config do |config|
Coradoc::Input::HTML.config do |config|
config.unknown_tags = :bypass
config.mathml2asciimath = true
config.tag_border = ''
Expand All @@ -290,10 +290,10 @@ end

[source,ruby]
----
require 'coradoc/reverse_adoc'
require 'coradoc/input/html'
# Options can be supplied as keyword arguments
Coradoc::ReverseAdoc::HtmlConverter.to_coradoc("<b><i>Some input</i></b>")
Coradoc::Input::HTML::HtmlConverter.to_coradoc("<b><i>Some input</i></b>")
----


Expand Down
8 changes: 4 additions & 4 deletions lib/coradoc/input/html/cleaner.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
class Cleaner
def tidy(string)
result = HtmlConverter.track_time "Removing inner whitespace" do
Expand Down Expand Up @@ -57,20 +57,20 @@ def remove_inner_whitespaces(string)
# Same for underscores and brackets.
def clean_tag_borders(string)
# result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
# preserve_border_whitespaces(match, default_border: Coradoc::ReverseAdoc.config.tag_border) do
# preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
# match.strip.sub("** ", "**").sub(" **", "**")
# end
# end

# result = string.gsub(/\s?_{2,}.*?_{2,}\s?/) do |match|
# preserve_border_whitespaces(match, default_border: Coradoc::ReverseAdoc.config.tag_border) do
# preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
# match.strip.sub("__ ", "__").sub(" __", "__")
# end
# end

result = string.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
preserve_border_whitespaces(match,
default_border: Coradoc::ReverseAdoc.config.tag_border) do
default_border: Coradoc::Input::HTML.config.tag_border) do
match.strip.sub("~~ ", "~~").sub(" ~~", "~~")
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/coradoc/input/html/config.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require "tmpdir"

module Coradoc
module ReverseAdoc
module Input::HTML
class Config
def initialize
@unknown_tags = :pass_through
Expand Down
12 changes: 6 additions & 6 deletions lib/coradoc/input/html/converters.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Coradoc
module ReverseAdoc
module Input::HTML
module Converters
def self.register(tag_name, converter)
@@converters ||= {}
Expand Down Expand Up @@ -38,18 +38,18 @@ def self.process_coradoc(node, state)
end

def self.default_converter(tag_name)
case Coradoc::ReverseAdoc.config.unknown_tags.to_sym
case Coradoc::Input::HTML.config.unknown_tags.to_sym
when :pass_through
Coradoc::ReverseAdoc::Converters::PassThrough.new
Coradoc::Input::HTML::Converters::PassThrough.new
when :drop
Coradoc::ReverseAdoc::Converters::Drop.new
Coradoc::Input::HTML::Converters::Drop.new
when :bypass
Coradoc::ReverseAdoc::Converters::Bypass.new
Coradoc::Input::HTML::Converters::Bypass.new
when :raise
raise UnknownTagError, "unknown tag: #{tag_name}"
else
raise InvalidConfigurationError,
"unknown value #{Coradoc::ReverseAdoc.config.unknown_tags.inspect} for Coradoc::ReverseAdoc.config.unknown_tags"
"unknown value #{Coradoc::Input::HTML.config.unknown_tags.inspect} for Coradoc::Input::HTML.config.unknown_tags"
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/coradoc/input/html/converters/a.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
require "coradoc"

module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
module Converters
class A < Base
def to_coradoc(node, state = {})
Expand Down
2 changes: 1 addition & 1 deletion lib/coradoc/input/html/converters/aside.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
module Converters
class Aside < Base
def to_coradoc(node, state = {})
Expand Down
2 changes: 1 addition & 1 deletion lib/coradoc/input/html/converters/audio.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
module Converters
class Audio < Base
def to_coradoc(node, _state = {})
Expand Down
2 changes: 1 addition & 1 deletion lib/coradoc/input/html/converters/base.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
module Converters
class Base
# Default implementation to convert a given Nokogiri node
Expand Down
4 changes: 2 additions & 2 deletions lib/coradoc/input/html/converters/blockquote.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
module Converters
class Blockquote < Base
def to_coradoc(node, state = {})
Expand All @@ -7,7 +7,7 @@ def to_coradoc(node, state = {})
attributes = Coradoc::Element::AttributeList.new
attributes.add_positional("quote", cite) if !cite.nil?
content = treat_children(node, state).strip
content = Coradoc::ReverseAdoc.cleaner.remove_newlines(content)
content = Coradoc::Input::HTML.cleaner.remove_newlines(content)
Coradoc::Element::Block::Quote.new(nil, lines: content,
attributes: attributes)
end
Expand Down
2 changes: 1 addition & 1 deletion lib/coradoc/input/html/converters/br.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
module Converters
class Br < Base
def to_coradoc(_node, _state = {})
Expand Down
2 changes: 1 addition & 1 deletion lib/coradoc/input/html/converters/bypass.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Coradoc::ReverseAdoc
module Coradoc::Input::HTML
module Converters
class Bypass < Base
def to_coradoc(node, state = {})
Expand Down
Loading

0 comments on commit ca77435

Please sign in to comment.