RDoc::Text

Hash.new do |h, encoding| h[encoding] = { :close_dquote => encode_fallback('”', encoding, '"'), :close_squote => encode_fallback('’', encoding, '\''), :copyright => encode_fallback('©', encoding, '(c)'), :ellipsis => encode_fallback('…', encoding, '...'), :em_dash => encode_fallback('—', encoding, '---'), :en_dash => encode_fallback('–', encoding, '--'), :open_dquote => encode_fallback('“', encoding, '"'), :open_squote => encode_fallback('‘', encoding, '\''), :trademark => encode_fallback('®', encoding, '(r)'), } end if Object.const_defined? :Encoding

Maps an encoding to a Hash of characters properly transcoded for that encoding.

See also encode_fallback.

Class Public methods

encode_fallback(character, encoding, fallback) Link

Transcodes character to encoding with a fallback character.

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 35
def self.encode_fallback character, encoding, fallback
  character.encode(encoding, :fallback => { character => fallback },
                   :undef => :replace, :replace => fallback)
end

Instance Public methods

expand_tabs(text) Link

Expands tab characters in text to eight spaces

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 43
def expand_tabs text
  expanded = []

  text.each_line do |line|
    line.gsub!(/^(.{8}*?)([^\t\r\n]{0,7})\t/) do
      r = "#{$1}#{$2}#{' ' * (8 - $2.size)}"
      r.force_encoding text.encoding if Object.const_defined? :Encoding
      r
    end until line !~ /\t/

    expanded << line
  end

  expanded.join
end

flush_left(text) Link

Flush text left based on the shortest line

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 62
def flush_left text
  indent = 9999

  text.each_line do |line|
    line_indent = line =~ /\S/ || 9999
    indent = line_indent if indent > line_indent
  end

  empty = ''
  empty.force_encoding text.encoding if Object.const_defined? :Encoding

  text.gsub(/^ {0,#{indent}}/, empty)
end

markup(text) Link

Convert a string in markup format into HTML.

Requires the including class to implement formatter

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 81
def markup text
  document = parse text

  document.accept formatter
end

normalize_comment(text) Link

Strips hashes, expands tabs then flushes text to the left

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 90
def normalize_comment text
  return text if text.empty?

  text = strip_hashes text
  text = expand_tabs text
  text = flush_left text
  text = strip_newlines text
  text
end

parse(text) Link

Normalizes text then builds a RDoc::Markup::Document from it

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 103
  def parse text
    return text if RDoc::Markup::Document === text

    text = normalize_comment text

    return RDoc::Markup::Document.new if text =~ /\A\n*\z/

    RDoc::Markup::Parser.parse text
  rescue RDoc::Markup::Parser::Error => e
    $stderr.puts <<-EOF
While parsing markup, RDoc encountered a #{e.class}:

#{e}
\tfrom #{e.backtrace.join "\n\tfrom "}

---8<---
#{text}
---8<---

RDoc #{RDoc::VERSION}

Ruby #{RUBY_VERSION}-p#{RUBY_PATCHLEVEL} #{RUBY_RELEASE_DATE}

Please file a bug report with the above information at:

https://github.com/rdoc/rdoc/issues

    EOF
    raise
  end

strip_hashes(text) Link

Strips leading # characters from text

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 137
def strip_hashes text
  return text if text =~ /^(?>\s*)[^\#]/

  empty = ''
  empty.force_encoding text.encoding if Object.const_defined? :Encoding

  text.gsub(/^\s*(#+)/) { $1.tr '#', ' ' }.gsub(/^\s+$/, empty)
end

strip_newlines(text) Link

Strips leading and trailing n characters from text

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 149
def strip_newlines text
  text.gsub(/\A\n*(.*?)\n*\z/m) do $1 end # block preserves String encoding
end

strip_stars(text) Link

Strips style comments

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 156
def strip_stars text
  encoding = text.encoding if Object.const_defined? :Encoding

  text = text.gsub %rDocument-method:\s+[\w:.#=!?]+%, ''

  space = ' '
  space.force_encoding encoding if encoding

  text.sub!  %r/\*+%       do space * $&.length end
  text.sub!  %r\*+/%       do space * $&.length end
  text.gsub! %r^[ \t]*\*%m do space * $&.length end

  empty = ''
  empty.force_encoding encoding if encoding
  text.gsub(/^\s+$/, empty)
end

to_html(text) Link

Converts ampersand, dashes, ellipsis, quotes, copyright and registered trademark symbols in text to properly encoded characters.

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 177
def to_html text
  if Object.const_defined? :Encoding then
    html = ''.encode text.encoding

    encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding]
  else
    html = ''
    encoded = {
      :close_dquote => '”',
      :close_squote => '’',
      :copyright    => '©',
      :ellipsis     => '…',
      :em_dash      => '—',
      :en_dash      => '–',
      :open_dquote  => '“',
      :open_squote  => '‘',
      :trademark    => '®',
    }
  end

  s = StringScanner.new text
  insquotes = false
  indquotes = false
  after_word = nil

  until s.eos? do
    case
    when s.scan(/<tt>.*?<\/tt>/) then # skip contents of tt
      html << s.matched.gsub('\\', '\')
    when s.scan(/<tt>.*?/) then
      warn 'mismatched <tt> tag' # TODO signal file/line
      html << s.matched
    when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags
      html << s.matched
    when s.scan(/\(\S)/) then # unhandled suppressed crossref
      html << s[1]
      after_word = nil
    when s.scan(/\.\.\.(\.?)/) then
      html << s[1] << encoded[:ellipsis]
      after_word = nil
    when s.scan(/\(c\)/) then
      html << encoded[:copyright]
      after_word = nil
    when s.scan(/\(r\)/) then
      html << encoded[:trademark]
      after_word = nil
    when s.scan(/---/) then
      html << encoded[:em_dash]
      after_word = nil
    when s.scan(/--/) then
      html << encoded[:en_dash]
      after_word = nil
    when s.scan(/&quot;|"/) then
      html << encoded[indquotes ? :close_dquote : :open_dquote]
      indquotes = !indquotes
      after_word = nil
    when s.scan(/``/) then # backtick double quote
      html << encoded[:open_dquote]
      after_word = nil
    when s.scan(/''/) then # tick double quote
      html << encoded[:close_dquote]
      after_word = nil
    when s.scan(/'/) then # single quote
      if insquotes
        html << encoded[:close_squote]
        insquotes = false
      elsif after_word
        # Mary's dog, my parents' house: do not start paired quotes
        html << encoded[:close_squote]
      else
        html << encoded[:open_squote]
        insquotes = true
      end

      after_word = nil
    else # advance to the next potentially significant character
      match = s.scan(/.+?(?=[<\.("'`&-])/) #"

      if match then
        html << match
        after_word = match =~ /\w$/
      else
        html << s.rest
        break
      end
    end
  end

  html
end

wrap(txt, line_len = 76) Link

Wraps txt to line_len

Source: show | on GitHub

# File ../ruby/lib/rdoc/text.rb, line 271
def wrap(txt, line_len = 76)
  res = []
  sp = 0
  ep = txt.length

  while sp < ep
    # scan back for a space
    p = sp + line_len - 1
    if p >= ep
      p = ep
    else
      while p > sp and txt[p] != ?\s
        p -= 1
      end
      if p <= sp
        p = sp + line_len
        while p < ep and txt[p] != ?\s
          p += 1
        end
      end
    end
    res << txt[sp...p] << "\n"
    sp = p
    sp += 1 while sp < ep and txt[sp] == ?\s
  end

  res.join.strip
end

Module RDoc::Text