module MaRuKu::In::Markdown::SpanLevelParser
Constants
- CharSource
-
Choose!
- EscapedCharInInlineCode
- EscapedCharInQuotes
- EscapedCharInText
- IgnoreWikiLinks
Public Instance Methods
Source
# File lib/maruku/input/parse_span.rb, line 351 def extension_meta(src, con, break_on_chars=nil) if m = src.read_regexp(/([^\s\:\"\'}]+?):/) name = m[1] al = read_attribute_list(src, con, break_on_chars) self.doc.ald[name] = al con.push md_ald(name, al) else al = read_attribute_list(src, con, break_on_chars) con.push md_ial(al) end end
Source
# File lib/maruku/input/parse_span.rb, line 330 def interpret_extension(src, con, break_on_chars=nil) case src.cur_char when ':' src.ignore_char # : extension_meta(src, con, break_on_chars) when '#', '.' extension_meta(src, con, break_on_chars) else stuff = read_simple(src, '}', break_on_chars) if stuff =~ /^(\w+\s|[^\w])/ extension_id = $1.strip maruku_recover "I don't know what to do with extension '#{extension_id}'\n" + "I will treat this:\n\t{#{stuff}} \n as meta-data.\n", src, con else maruku_recover "I will treat this:\n\t{#{stuff}} \n as meta-data.\n", src, con end extension_meta(src, con, break_on_chars) end end
Start: cursor on character after ‘{’ End: curson on ‘}’ or EOF
Source
# File lib/maruku/attributes.rb, line 28 def md_al(s = []) AttributeList.new(s) end
Source
# File lib/maruku/attributes.rb, line 91 def merge_ial(elements, src, con) # Apply each IAL to the element before (elements + [nil]).each_cons(3) do |before, e, after| next unless ial?(e) if before.kind_of? MDElement before.al = e.ial elsif after.kind_of? MDElement after.al = e.ial else maruku_error <<ERR, src, con It's unclear which element the attribute list {:#{e.ial.to_s}} is referring to. The element before is a #{before.class}, the element after is a #{after.class}. before: #{before.inspect} after: #{after.inspect} ERR end end unless Globals[:debug_keep_ials] elements.delete_if {|x| ial?(x) && x != elements.first} end end
Source
# File lib/maruku/input/parse_span.rb, line 11 def parse_span(string, parent=nil) string = Array(string).join("\n") unless string.kind_of? String src = MaRuKu::In::Markdown::SpanLevelParser::CharSource.new(string, parent) read_span(src, EscapedCharInText, [nil]) end
Source
# File lib/maruku/attributes.rb, line 33 def read_attribute_list(src, con=nil, break_on_chars=nil) break_on_chars = Array(break_on_chars) separators = break_on_chars + ['=', ' ', "\t"] escaped = Maruku::EscapedCharInQuotes al = AttributeList.new loop do src.consume_whitespace break if break_on_chars.include? src.cur_char case src.cur_char when ':' src.ignore_char when nil break # we're done here. when '=' # error src.ignore_char maruku_error "In attribute lists, cannot start identifier with `=`." tell_user "Ignoring and continuing." when '#' # id definition src.ignore_char if id = read_quoted_or_unquoted(src, con, escaped, separators) al << [:id, id] else maruku_error 'Could not read `id` attribute.', src, con tell_user 'Ignoring bad `id` attribute.' end when '.' # class definition src.ignore_char if klass = read_quoted_or_unquoted(src, con, escaped, separators) al << [:class, klass] else maruku_error 'Could not read `class` attribute.', src, con tell_user 'Ignoring bad `class` attribute.' end else unless key = read_quoted_or_unquoted(src, con, escaped, separators) maruku_error 'Could not read key or reference.' next end if src.cur_char != '=' && key.length > 0 al << [:ref, key] next end src.ignore_char # skip the = if val = read_quoted_or_unquoted(src, con, escaped, separators) al << [key, val] else maruku_error "Could not read value for key #{key.inspect}.", src, con tell_user "Ignoring key #{key.inspect}" end end end al end
@return [AttributeList, nil]
Source
# File lib/maruku/input/parse_span.rb, line 459 def read_em(src, delim) src.ignore_char children = read_span(src, EscapedCharInText, nil, delim) src.ignore_char md_em(children) end
Source
# File lib/maruku/input/parse_span.rb, line 371 def read_email_el(src,con) src.ignore_char # leading < mail = read_simple(src, nil, '>') src.ignore_char # closing > address = mail.gsub(/^mailto:/, '') con.push_element md_email(address) end
Source
# File lib/maruku/input/parse_span.rb, line 473 def read_emstrong(src, delim) src.ignore_chars(3) children = read_span(src, EscapedCharInText, nil, delim) src.ignore_chars(3) md_emstrong(children) end
Source
# File lib/maruku/input/parse_span.rb, line 490 def read_footnote_ref(src,con) ref = read_ref_id(src,con) con.push_element md_foot_ref(ref) end
Source
# File lib/maruku/input/parse_span.rb, line 624 def read_image(src, con) src.ignore_chars(2) # opening "![" alt_text = read_span(src, EscapedCharInText, ']') src.ignore_char # closing bracket # ignore space if src.cur_char == ' ' && ['[', '('].include?(src.next_char) src.ignore_char end case src.cur_char when '(' src.ignore_char # opening ( src.consume_whitespace url = read_url(src, [' ', "\t", ')']) unless url maruku_error "Could not read url from #{src.cur_chars(10).inspect}", src, con end src.consume_whitespace title = nil if src.cur_char != ')' # we have a title quote_char = src.cur_char title = read_quoted(src, con) if !title maruku_error 'Must quote title', src, con else # Tries to read a title with quotes:  # this is the most ugly thing in Markdown if !src.next_matches(/\s*\)/) # if there is not a closing par ), then read # the rest and guess it's title with quotes rest = read_simple(src, nil, ')', nil) # chop the closing char rest.chop! title << quote_char << rest end end end src.consume_whitespace closing = src.shift_char # closing ) if closing != ')' maruku_error "Unclosed link: '#{closing}'" + " Read url=#{url.inspect} title=#{title.inspect}", src, con end con.push_element md_im_image(alt_text, url, title) when '[' # link ref ref_id = read_ref_id(src, con) if !ref_id # TODO: check around maruku_error 'Reference not closed.', src, con ref_id = "" end con.push_element md_image(alt_text, ref_id) else # no stuff ref_id = alt_text.join con.push_element md_image(alt_text, ref_id) end end
Source
# File lib/maruku/input/parse_span.rb, line 521 def read_inline_code(src, con) # Count the number of ticks num_ticks = 0 while src.cur_char == '`' num_ticks += 1 src.ignore_char end # We will read until this string end_string = "`" * num_ticks # Try to handle empty single-ticks if num_ticks > 1 && !src.next_matches(/.*#{Regexp.escape(end_string)}/) con.push_element md_entity('ldquo') src.ignore_chars(2) return end code = read_simple(src, nil, nil, end_string) # We didn't find a closing batch! if !code || src.cur_char != '`' con.push_element(end_string + (code || '')) and return end # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}" src.ignore_chars num_ticks # Ignore at most one space if num_ticks > 1 && code[0, 1] == ' ' code = code[1..-1] end # drop last space if num_ticks > 1 && code[-1, 1] == ' ' code = code[0..-2] end # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} " con.push_element md_code(code) end
Source
# File lib/maruku/input/parse_span.rb, line 495 def read_inline_html(src, con) h = HTMLHelper.new begin # This is our current buffer in the context next_stuff = src.current_remaining_buffer consumed = 0 while true if consumed >= next_stuff.size maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con break end h.eat_this next_stuff[consumed].chr consumed += 1 break if h.is_finished? end src.ignore_chars(consumed) con.push_element md_html(h.stuff_you_read) rescue => e maruku_error "Bad html: \n" + e.inspect.gsub(/^/, '>'), src, con, "I will try to continue after bad HTML." con.push_char src.shift_char end end
Source
# File lib/maruku/input/parse_span.rb, line 562 def read_link(src, con) # we read the string and see what happens src.ignore_char # opening bracket children = read_span(src, EscapedCharInText, ']') src.ignore_char # closing bracket # ignore space if src.cur_char == ' ' && ['[', '('].include?(src.next_char) src.shift_char end case src.cur_char when '(' src.ignore_char # opening ( src.consume_whitespace url = read_url(src, [' ', "\t", ")"]) || '' src.consume_whitespace title = nil if src.cur_char != ')' # we have a title quote_char = src.cur_char title = read_quoted(src, con) if not title maruku_error 'Must quote title', src, con else # Tries to read a title with quotes:  # this is the most ugly thing in Markdown unless src.next_matches(/\s*\)/) # if there is not a closing par ), then read # the rest and guess it's title with quotes rest = read_simple(src, nil, ')', nil) # chop the closing char rest.chop! title << quote_char << rest end end end src.consume_whitespace closing = src.shift_char # closing ) if closing != ')' maruku_error 'Unclosed link', src, con, "No closing ): I will not create" + " the link for #{children.inspect}" con.push_elements children return end con.push_element md_im_link(children, url, title) when '[' # link ref ref_id = read_ref_id(src, con) if ref_id con.push_element md_link(children, ref_id) else maruku_error "Could not read ref_id", src, con, "I will not create the link for " + "#{children.inspect}" con.push_elements children return end else # empty [link] con.push_element md_link(children, nil) end end
Source
# File lib/maruku/input/parse_span.rb, line 407 def read_quoted(src, con) case src.cur_char when "'", '"' quote_char = src.shift_char # opening quote string = read_simple(src, EscapedCharInQuotes, quote_char) src.ignore_char # closing quote string else nil end end
Tries to read a quoted value. If stream does not start with ‘ or “, returns nil.
Source
# File lib/maruku/input/parse_span.rb, line 396 def read_quoted_or_unquoted(src, con, escaped, exit_on_chars) case src.cur_char when "'", '"' read_quoted(src, con) else read_simple(src, escaped, exit_on_chars, nil, false) end end
Source
# File lib/maruku/input/parse_span.rb, line 481 def read_ref_id(src, con) src.ignore_char # [ if m = src.read_regexp(/([^\]]*?)\]/) m[1] else nil end end
Reads a bracketed id “[refid]”. Consumes also both brackets.
Source
# File lib/maruku/input/parse_span.rb, line 424 def read_simple(src, escaped, exit_on_chars=nil, exit_on_strings=nil, warn=true) text = "" escaped = Array(escaped) exit_on_chars = Array(exit_on_chars) exit_on_strings = Array(exit_on_strings) while true c = src.cur_char break if exit_on_chars.include?(c) break if exit_on_strings.any? {|x| src.cur_chars_are x } case c when nil if warn maruku_error "String finished while reading (break on " + "#{(exit_on_chars + exit_on_strings).inspect})" + " already read: #{text.inspect}", src end break when "\\" d = src.next_char if escaped.include? d src.ignore_chars(2) text << d else text << src.shift_char end else text << src.shift_char end end text.empty? ? nil : text end
Reads a simple string (no formatting) until one of exit_on_chars, while escaping the escaped. If the string is empty, it returns nil. By default, raises on error if the string terminates unexpectedly. This can be by setting the last argument to false.
Source
# File lib/maruku/input/parse_span.rb, line 22 def read_span(src, escaped, exit_on_chars=nil, exit_on_strings=nil) escaped = Array(escaped) con = SpanContext.new dquote_state = squote_state = :closed c = d = prev_char = nil while true c = src.cur_char # This is only an optimization which cuts 50% of the time used. # (but you can't use a-zA-z in exit_on_chars) if c && c =~ /[[:alnum:]]/ con.push_char src.shift_char prev_char = c next end break if Array(exit_on_chars).include?(c) if Array(exit_on_strings).any? {|x| src.cur_chars_are x } # Special case: bold nested in italic break unless !(['*', '_'] & Array(exit_on_strings)).empty? && ['**', '__'].include?(src.cur_chars(2)) && !['***', '___'].include?(src.cur_chars(3)) end # check if there are extensions next if check_span_extensions(src, con) case c = src.cur_char when ' ' if src.cur_chars_are " \n" src.ignore_chars(3) con.push_element md_br prev_char = ' ' next elsif src.cur_chars_are ' >>' # closing guillemettes src.ignore_chars(3) con.push_element md_entity('nbsp') con.push_element md_entity('raquo') elsif src.cur_chars(5) =~ / '\d\ds/ # special case: '80s src.ignore_chars(2) con.push_space con.push_element md_entity('rsquo') elsif src.cur_chars_are " '" # opening single-quote src.ignore_chars(2) con.push_space con.push_element md_entity('lsquo') squote_state = :open else src.ignore_char con.push_space end when "\n", "\t" src.ignore_char con.push_space when '`' read_inline_code(src, con) when '<' # It could be: # 1) HTML "<div ..." # 2) HTML "<!-- ..." # 3) url "<http:// ", "<ftp:// ..." # 4) email "<andrea@... ", "<mailto:andrea@..." # 5) on itself! "a < b " # 6) Start of <<guillemettes>> case d = src.next_char when '<' # guillemettes if src.cur_chars_are '<< ' src.ignore_chars(3) con.push_element md_entity('laquo') con.push_element md_entity('nbsp') else src.ignore_chars(2) con.push_element md_entity('laquo') end when '!' if src.cur_chars_are '<!--' read_inline_html(src, con) else con.push_char src.shift_char end when '?' read_xml_instr_span(src, con) when ' ', "\t" con.push_char src.shift_char else if src.next_matches(/<mailto:/) || src.next_matches(/<[\w\.]+\@/) read_email_el(src, con) elsif src.next_matches(/<\w+:/) read_url_el(src, con) elsif src.next_matches(/<\w/) #puts "This is HTML: #{src.cur_chars(20)}" read_inline_html(src, con) else #puts "This is NOT HTML: #{src.cur_chars(20)}" con.push_char src.shift_char end end when '>' if src.next_char == '>' src.ignore_chars(2) con.push_element md_entity('raquo') else con.push_char src.shift_char end when "\\" d = src.next_char if d == "'" src.ignore_chars(2) con.push_element md_entity('apos') elsif d == '"' src.ignore_chars(2) con.push_element md_entity('quot') elsif escaped.include? d src.ignore_chars(2) con.push_char d else con.push_char src.shift_char end when '[' if markdown_extra? && src.next_char == '^' read_footnote_ref(src,con) elsif IgnoreWikiLinks && src.next_char == '[' con.push_char src.shift_char con.push_char src.shift_char else read_link(src, con) end when '!' if src.next_char == '[' read_image(src, con) else con.push_char src.shift_char end when '&' # named references if m = src.read_regexp(/\&(\w+);/) con.push_element md_entity(m[1]) # numeric elsif m = src.read_regexp(/\&\#(x)?(\w+);/) num = m[1] ? m[2].hex : m[2].to_i con.push_element md_entity(num) else con.push_char src.shift_char end when '*' if !src.next_char maruku_error "Opening * as last char.", src, con, 'Treating as literal' con.push_char src.shift_char else follows = src.cur_chars(4) if follows =~ /^\*\*\*[^\s\*]/ con.push_element read_emstrong(src, '***') elsif follows =~ /^\*\*[^\s\*]/ con.push_element read_strong(src, '**') elsif follows =~ /^\*[^\s\*]/ con.push_element read_em(src, '*') else # * is just a normal char con.push_char src.shift_char end end when '_' if !src.next_char maruku_error "Opening _ as last char", src, con, 'Treating as literal' con.push_char src.shift_char else # we don't want "mod_ruby" to start an emphasis # so we start one only if # 1) there's nothing else in the span (first char) # or 2) the last char was a space # or 3) the current string is empty #if con.elements.empty? || if con.is_end? # also, we check the next characters follows = src.cur_chars(4) if follows =~ /^\_\_\_[^\s\_]/ con.push_element read_emstrong(src, '___') elsif follows =~ /^\_\_[^\s\_]/ con.push_element read_strong(src, '__') elsif follows =~ /^\_[^\s\_]/ con.push_element read_em(src, '_') else # _ is just a normal char con.push_char src.shift_char end else # _ is just a normal char con.push_char src.shift_char end end when '{' # extension if ['#', '.', ':'].include? src.next_char src.ignore_char # { interpret_extension(src, con, '}') src.ignore_char # } else con.push_char src.shift_char end when nil maruku_error( ("Unclosed span (waiting for %s" + "#{exit_on_strings.inspect})") % [ exit_on_chars ? "#{exit_on_chars.inspect} or" : "" ], src, con) break when '-' # dashes if src.next_char == '-' if src.cur_chars_are '---' src.ignore_chars(3) con.push_element md_entity('mdash') else src.ignore_chars(2) con.push_element md_entity('ndash') end else con.push_char src.shift_char end when '.' # ellipses if src.cur_chars_are '...' src.ignore_chars(3) con.push_element md_entity('hellip') elsif src.cur_chars_are '. . .' src.ignore_chars(5) con.push_element md_entity('hellip') else con.push_char src.shift_char end when '"' if dquote_state == :closed dquote_state = :open src.ignore_char con.push_element md_entity('ldquo') else dquote_state = :closed src.ignore_char con.push_element md_entity('rdquo') end when "'" if src.cur_chars(4) =~ /'\d\ds/ # special case: '80s src.ignore_char con.push_element md_entity('rsquo') elsif squote_state == :open squote_state = :closed unless src.next_char =~ /[[:alpha:]]/ src.ignore_char con.push_element md_entity('rsquo') else if prev_char =~ /[[:alpha:]]/ src.ignore_char con.push_element md_entity('rsquo') else src.ignore_char con.push_element md_entity('lsquo') squote_state = :open end end else # normal text con.push_char src.shift_char end # end case prev_char = c end # end while true con.push_string_if_present # Assign IAL to elements merge_ial(con.elements, src, con) # Remove leading space if (s = con.elements.first).kind_of? String if s[0, 1] == ' ' con.elements[0] = s[1..-1] end con.elements.shift if s.empty? end con.elements.shift if (con.elements.first.kind_of?(String) && con.elements.first.empty?) # Remove final spaces if (s = con.elements.last).kind_of? String s.chop! if s[-1, 1] == ' ' con.elements.pop if s.empty? end con.elements end
This is the main loop for reading span elements
It’s long, but not complex or difficult to understand.
Source
# File lib/maruku/input/parse_span.rb, line 466 def read_strong(src, delim) src.ignore_chars(2) children = read_span(src, EscapedCharInText, nil, delim) src.ignore_chars(2) md_strong(children) end
Source
# File lib/maruku/input/parse_span.rb, line 380 def read_url(src, break_on) if ["'", '"'].include? src.cur_char maruku_error 'Invalid char for url', src end url = read_simple(src, nil, break_on) || '' if url[0, 1] == '<' && url[-1, 1] == '>' url = url[1, url.size-2] end return nil if url.empty? url end
Source
# File lib/maruku/input/parse_span.rb, line 363 def read_url_el(src,con) src.ignore_char # leading < url = read_simple(src, nil, '>') src.ignore_char # closing > con.push_element md_url(url) end
Source
# File lib/maruku/input/parse_span.rb, line 307 def read_xml_instr_span(src, con) src.ignore_chars(2) # starting <? # read target <?target code... ?> target = if m = src.read_regexp(/^(\w+)/) m[1] else # XML instructions are invalid without a target '' end delim = "?>" code = read_simple(src, nil, nil, delim) src.ignore_chars delim.size code = (code || "").strip con.push_element md_xml_instr(target, code) end
Private Instance Methods
Source
# File lib/maruku/attributes.rb, line 118 def ial?(e) e.is_a?(MDElement) && e.node_type == :ial end