chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
"""A Python port of Markdown-It"""
|
||||
|
||||
__all__ = ("MarkdownIt",)
|
||||
__version__ = "4.0.0"
|
||||
|
||||
from .main import MarkdownIt
|
||||
@@ -0,0 +1 @@
|
||||
from __future__ import annotations
|
||||
@@ -0,0 +1,67 @@
|
||||
# Copyright 2014 Mathias Bynens <https://mathiasbynens.be/>
|
||||
# Copyright 2021 Taneli Hukkinen
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import codecs
|
||||
from collections.abc import Callable
|
||||
import re
|
||||
|
||||
REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]")
|
||||
REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]")
|
||||
|
||||
|
||||
def encode(uni: str) -> str:
|
||||
return codecs.encode(uni, encoding="punycode").decode()
|
||||
|
||||
|
||||
def decode(ascii: str) -> str:
|
||||
return codecs.decode(ascii, encoding="punycode") # type: ignore
|
||||
|
||||
|
||||
def map_domain(string: str, fn: Callable[[str], str]) -> str:
|
||||
parts = string.split("@")
|
||||
result = ""
|
||||
if len(parts) > 1:
|
||||
# In email addresses, only the domain name should be punycoded. Leave
|
||||
# the local part (i.e. everything up to `@`) intact.
|
||||
result = parts[0] + "@"
|
||||
string = parts[1]
|
||||
labels = REGEX_SEPARATORS.split(string)
|
||||
encoded = ".".join(fn(label) for label in labels)
|
||||
return result + encoded
|
||||
|
||||
|
||||
def to_unicode(obj: str) -> str:
|
||||
def mapping(obj: str) -> str:
|
||||
if obj.startswith("xn--"):
|
||||
return decode(obj[4:].lower())
|
||||
return obj
|
||||
|
||||
return map_domain(obj, mapping)
|
||||
|
||||
|
||||
def to_ascii(obj: str) -> str:
|
||||
def mapping(obj: str) -> str:
|
||||
if REGEX_NON_ASCII.search(obj):
|
||||
return "xn--" + encode(obj)
|
||||
return obj
|
||||
|
||||
return map_domain(obj, mapping)
|
||||
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
CLI interface to markdown-it-py
|
||||
|
||||
Parse one or more markdown files, convert each to HTML, and print to stdout.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from collections.abc import Iterable, Sequence
|
||||
import sys
|
||||
|
||||
from markdown_it import __version__
|
||||
from markdown_it.main import MarkdownIt
|
||||
|
||||
version_str = f"markdown-it-py [version {__version__}]"
|
||||
|
||||
|
||||
def main(args: Sequence[str] | None = None) -> int:
|
||||
namespace = parse_args(args)
|
||||
if namespace.filenames:
|
||||
convert(namespace.filenames)
|
||||
else:
|
||||
interactive()
|
||||
return 0
|
||||
|
||||
|
||||
def convert(filenames: Iterable[str]) -> None:
|
||||
for filename in filenames:
|
||||
convert_file(filename)
|
||||
|
||||
|
||||
def convert_file(filename: str) -> None:
|
||||
"""
|
||||
Parse a Markdown file and dump the output to stdout.
|
||||
"""
|
||||
try:
|
||||
with open(filename, encoding="utf8", errors="ignore") as fin:
|
||||
rendered = MarkdownIt().render(fin.read())
|
||||
print(rendered, end="")
|
||||
except OSError:
|
||||
sys.stderr.write(f'Cannot open file "{filename}".\n')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def interactive() -> None:
|
||||
"""
|
||||
Parse user input, dump to stdout, rinse and repeat.
|
||||
Python REPL style.
|
||||
"""
|
||||
print_heading()
|
||||
contents = []
|
||||
more = False
|
||||
while True:
|
||||
try:
|
||||
prompt, more = ("... ", True) if more else (">>> ", True)
|
||||
contents.append(input(prompt) + "\n")
|
||||
except EOFError:
|
||||
print("\n" + MarkdownIt().render("\n".join(contents)), end="")
|
||||
more = False
|
||||
contents = []
|
||||
except KeyboardInterrupt:
|
||||
print("\nExiting.")
|
||||
break
|
||||
|
||||
|
||||
def parse_args(args: Sequence[str] | None) -> argparse.Namespace:
|
||||
"""Parse input CLI arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse one or more markdown files, "
|
||||
"convert each to HTML, and print to stdout",
|
||||
# NOTE: Remember to update README.md w/ the output of `markdown-it -h`
|
||||
epilog=(
|
||||
f"""
|
||||
Interactive:
|
||||
|
||||
$ markdown-it
|
||||
markdown-it-py [version {__version__}] (interactive)
|
||||
Type Ctrl-D to complete input, or Ctrl-C to exit.
|
||||
>>> # Example
|
||||
... > markdown *input*
|
||||
...
|
||||
<h1>Example</h1>
|
||||
<blockquote>
|
||||
<p>markdown <em>input</em></p>
|
||||
</blockquote>
|
||||
|
||||
Batch:
|
||||
|
||||
$ markdown-it README.md README.footer.md > index.html
|
||||
"""
|
||||
),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("-v", "--version", action="version", version=version_str)
|
||||
parser.add_argument(
|
||||
"filenames", nargs="*", help="specify an optional list of files to convert"
|
||||
)
|
||||
return parser.parse_args(args)
|
||||
|
||||
|
||||
def print_heading() -> None:
|
||||
print(f"{version_str} (interactive)")
|
||||
print("Type Ctrl-D to complete input, or Ctrl-C to exit.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = main(sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
@@ -0,0 +1,5 @@
|
||||
"""HTML5 entities map: { name -> characters }."""
|
||||
|
||||
import html.entities
|
||||
|
||||
entities = {name.rstrip(";"): chars for name, chars in html.entities.html5.items()}
|
||||
@@ -0,0 +1,69 @@
|
||||
"""List of valid html blocks names, according to commonmark spec
|
||||
http://jgm.github.io/CommonMark/spec.html#html-blocks
|
||||
"""
|
||||
|
||||
# see https://spec.commonmark.org/0.31.2/#html-blocks
|
||||
block_names = [
|
||||
"address",
|
||||
"article",
|
||||
"aside",
|
||||
"base",
|
||||
"basefont",
|
||||
"blockquote",
|
||||
"body",
|
||||
"caption",
|
||||
"center",
|
||||
"col",
|
||||
"colgroup",
|
||||
"dd",
|
||||
"details",
|
||||
"dialog",
|
||||
"dir",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"frame",
|
||||
"frameset",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"head",
|
||||
"header",
|
||||
"hr",
|
||||
"html",
|
||||
"iframe",
|
||||
"legend",
|
||||
"li",
|
||||
"link",
|
||||
"main",
|
||||
"menu",
|
||||
"menuitem",
|
||||
"nav",
|
||||
"noframes",
|
||||
"ol",
|
||||
"optgroup",
|
||||
"option",
|
||||
"p",
|
||||
"param",
|
||||
"search",
|
||||
"section",
|
||||
"summary",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"title",
|
||||
"tr",
|
||||
"track",
|
||||
"ul",
|
||||
]
|
||||
@@ -0,0 +1,39 @@
|
||||
"""Regexps to match html elements"""
|
||||
|
||||
import re
|
||||
|
||||
attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
|
||||
|
||||
unquoted = "[^\"'=<>`\\x00-\\x20]+"
|
||||
single_quoted = "'[^']*'"
|
||||
double_quoted = '"[^"]*"'
|
||||
|
||||
attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
|
||||
|
||||
attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
|
||||
|
||||
open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
|
||||
|
||||
close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
|
||||
comment = "<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->"
|
||||
processing = "<[?][\\s\\S]*?[?]>"
|
||||
declaration = "<![A-Za-z][^>]*>"
|
||||
cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
|
||||
|
||||
HTML_TAG_RE = re.compile(
|
||||
"^(?:"
|
||||
+ open_tag
|
||||
+ "|"
|
||||
+ close_tag
|
||||
+ "|"
|
||||
+ comment
|
||||
+ "|"
|
||||
+ processing
|
||||
+ "|"
|
||||
+ declaration
|
||||
+ "|"
|
||||
+ cdata
|
||||
+ ")"
|
||||
)
|
||||
HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
|
||||
HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)
|
||||
@@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from contextlib import suppress
|
||||
import re
|
||||
from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401
|
||||
|
||||
import mdurl
|
||||
|
||||
from .. import _punycode
|
||||
|
||||
RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:")
|
||||
|
||||
|
||||
def normalizeLink(url: str) -> str:
|
||||
"""Normalize destination URLs in links
|
||||
|
||||
::
|
||||
|
||||
[label]: destination 'title'
|
||||
^^^^^^^^^^^
|
||||
"""
|
||||
parsed = mdurl.parse(url, slashes_denote_host=True)
|
||||
|
||||
# Encode hostnames in urls like:
|
||||
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
||||
#
|
||||
# We don't encode unknown schemas, because it's likely that we encode
|
||||
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
||||
#
|
||||
if parsed.hostname and (
|
||||
not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR
|
||||
):
|
||||
with suppress(Exception):
|
||||
parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname))
|
||||
|
||||
return mdurl.encode(mdurl.format(parsed))
|
||||
|
||||
|
||||
def normalizeLinkText(url: str) -> str:
|
||||
"""Normalize autolink content
|
||||
|
||||
::
|
||||
|
||||
<destination>
|
||||
~~~~~~~~~~~
|
||||
"""
|
||||
parsed = mdurl.parse(url, slashes_denote_host=True)
|
||||
|
||||
# Encode hostnames in urls like:
|
||||
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
||||
#
|
||||
# We don't encode unknown schemas, because it's likely that we encode
|
||||
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
||||
#
|
||||
if parsed.hostname and (
|
||||
not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR
|
||||
):
|
||||
with suppress(Exception):
|
||||
parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname))
|
||||
|
||||
# add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720
|
||||
return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%")
|
||||
|
||||
|
||||
BAD_PROTO_RE = re.compile(r"^(vbscript|javascript|file|data):")
|
||||
GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);")
|
||||
|
||||
|
||||
def validateLink(url: str, validator: Callable[[str], bool] | None = None) -> bool:
|
||||
"""Validate URL link is allowed in output.
|
||||
|
||||
This validator can prohibit more than really needed to prevent XSS.
|
||||
It's a tradeoff to keep code simple and to be secure by default.
|
||||
|
||||
Note: url should be normalized at this point, and existing entities decoded.
|
||||
"""
|
||||
if validator is not None:
|
||||
return validator(url)
|
||||
url = url.strip().lower()
|
||||
return bool(GOOD_DATA_RE.search(url)) if BAD_PROTO_RE.search(url) else True
|
||||
@@ -0,0 +1,313 @@
|
||||
"""Utilities for parsing source text"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from re import Match
|
||||
from typing import TypeVar
|
||||
import unicodedata
|
||||
|
||||
from .entities import entities
|
||||
|
||||
|
||||
def charCodeAt(src: str, pos: int) -> int | None:
|
||||
"""
|
||||
Returns the Unicode value of the character at the specified location.
|
||||
|
||||
@param - index The zero-based index of the desired character.
|
||||
If there is no character at the specified index, NaN is returned.
|
||||
|
||||
This was added for compatibility with python
|
||||
"""
|
||||
try:
|
||||
return ord(src[pos])
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
def charStrAt(src: str, pos: int) -> str | None:
|
||||
"""
|
||||
Returns the Unicode value of the character at the specified location.
|
||||
|
||||
@param - index The zero-based index of the desired character.
|
||||
If there is no character at the specified index, NaN is returned.
|
||||
|
||||
This was added for compatibility with python
|
||||
"""
|
||||
try:
|
||||
return src[pos]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
_ItemTV = TypeVar("_ItemTV")
|
||||
|
||||
|
||||
def arrayReplaceAt(
|
||||
src: list[_ItemTV], pos: int, newElements: list[_ItemTV]
|
||||
) -> list[_ItemTV]:
|
||||
"""
|
||||
Remove element from array and put another array at those position.
|
||||
Useful for some operations with tokens
|
||||
"""
|
||||
return src[:pos] + newElements + src[pos + 1 :]
|
||||
|
||||
|
||||
def isValidEntityCode(c: int) -> bool:
|
||||
# broken sequence
|
||||
if c >= 0xD800 and c <= 0xDFFF:
|
||||
return False
|
||||
# never used
|
||||
if c >= 0xFDD0 and c <= 0xFDEF:
|
||||
return False
|
||||
if ((c & 0xFFFF) == 0xFFFF) or ((c & 0xFFFF) == 0xFFFE):
|
||||
return False
|
||||
# control codes
|
||||
if c >= 0x00 and c <= 0x08:
|
||||
return False
|
||||
if c == 0x0B:
|
||||
return False
|
||||
if c >= 0x0E and c <= 0x1F:
|
||||
return False
|
||||
if c >= 0x7F and c <= 0x9F:
|
||||
return False
|
||||
# out of range
|
||||
return not (c > 0x10FFFF)
|
||||
|
||||
|
||||
def fromCodePoint(c: int) -> str:
|
||||
"""Convert ordinal to unicode.
|
||||
|
||||
Note, in the original Javascript two string characters were required,
|
||||
for codepoints larger than `0xFFFF`.
|
||||
But Python 3 can represent any unicode codepoint in one character.
|
||||
"""
|
||||
return chr(c)
|
||||
|
||||
|
||||
# UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])')
|
||||
# ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE)
|
||||
UNESCAPE_ALL_RE = re.compile(
|
||||
r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
DIGITAL_ENTITY_BASE10_RE = re.compile(r"#([0-9]{1,8})")
|
||||
DIGITAL_ENTITY_BASE16_RE = re.compile(r"#x([a-f0-9]{1,8})", re.IGNORECASE)
|
||||
|
||||
|
||||
def replaceEntityPattern(match: str, name: str) -> str:
|
||||
"""Convert HTML entity patterns,
|
||||
see https://spec.commonmark.org/0.30/#entity-references
|
||||
"""
|
||||
if name in entities:
|
||||
return entities[name]
|
||||
|
||||
code: None | int = None
|
||||
if pat := DIGITAL_ENTITY_BASE10_RE.fullmatch(name):
|
||||
code = int(pat.group(1), 10)
|
||||
elif pat := DIGITAL_ENTITY_BASE16_RE.fullmatch(name):
|
||||
code = int(pat.group(1), 16)
|
||||
|
||||
if code is not None and isValidEntityCode(code):
|
||||
return fromCodePoint(code)
|
||||
|
||||
return match
|
||||
|
||||
|
||||
def unescapeAll(string: str) -> str:
|
||||
def replacer_func(match: Match[str]) -> str:
|
||||
escaped = match.group(1)
|
||||
if escaped:
|
||||
return escaped
|
||||
entity = match.group(2)
|
||||
return replaceEntityPattern(match.group(), entity)
|
||||
|
||||
if "\\" not in string and "&" not in string:
|
||||
return string
|
||||
return UNESCAPE_ALL_RE.sub(replacer_func, string)
|
||||
|
||||
|
||||
ESCAPABLE = r"""\\!"#$%&'()*+,./:;<=>?@\[\]^`{}|_~-"""
|
||||
ESCAPE_CHAR = re.compile(r"\\([" + ESCAPABLE + r"])")
|
||||
|
||||
|
||||
def stripEscape(string: str) -> str:
|
||||
"""Strip escape \\ characters"""
|
||||
return ESCAPE_CHAR.sub(r"\1", string)
|
||||
|
||||
|
||||
def escapeHtml(raw: str) -> str:
|
||||
"""Replace special characters "&", "<", ">" and '"' to HTML-safe sequences."""
|
||||
# like html.escape, but without escaping single quotes
|
||||
raw = raw.replace("&", "&") # Must be done first!
|
||||
raw = raw.replace("<", "<")
|
||||
raw = raw.replace(">", ">")
|
||||
raw = raw.replace('"', """)
|
||||
return raw
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
REGEXP_ESCAPE_RE = re.compile(r"[.?*+^$[\]\\(){}|-]")
|
||||
|
||||
|
||||
def escapeRE(string: str) -> str:
|
||||
string = REGEXP_ESCAPE_RE.sub("\\$&", string)
|
||||
return string
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
def isSpace(code: int | None) -> bool:
|
||||
"""Check if character code is a whitespace."""
|
||||
return code in (0x09, 0x20)
|
||||
|
||||
|
||||
def isStrSpace(ch: str | None) -> bool:
|
||||
"""Check if character is a whitespace."""
|
||||
return ch in ("\t", " ")
|
||||
|
||||
|
||||
MD_WHITESPACE = {
|
||||
0x09, # \t
|
||||
0x0A, # \n
|
||||
0x0B, # \v
|
||||
0x0C, # \f
|
||||
0x0D, # \r
|
||||
0x20, # space
|
||||
0xA0,
|
||||
0x1680,
|
||||
0x202F,
|
||||
0x205F,
|
||||
0x3000,
|
||||
}
|
||||
|
||||
|
||||
def isWhiteSpace(code: int) -> bool:
|
||||
r"""Zs (unicode class) || [\t\f\v\r\n]"""
|
||||
if code >= 0x2000 and code <= 0x200A:
|
||||
return True
|
||||
return code in MD_WHITESPACE
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
def isPunctChar(ch: str) -> bool:
|
||||
"""Check if character is a punctuation character."""
|
||||
return unicodedata.category(ch).startswith(("P", "S"))
|
||||
|
||||
|
||||
MD_ASCII_PUNCT = {
|
||||
0x21, # /* ! */
|
||||
0x22, # /* " */
|
||||
0x23, # /* # */
|
||||
0x24, # /* $ */
|
||||
0x25, # /* % */
|
||||
0x26, # /* & */
|
||||
0x27, # /* ' */
|
||||
0x28, # /* ( */
|
||||
0x29, # /* ) */
|
||||
0x2A, # /* * */
|
||||
0x2B, # /* + */
|
||||
0x2C, # /* , */
|
||||
0x2D, # /* - */
|
||||
0x2E, # /* . */
|
||||
0x2F, # /* / */
|
||||
0x3A, # /* : */
|
||||
0x3B, # /* ; */
|
||||
0x3C, # /* < */
|
||||
0x3D, # /* = */
|
||||
0x3E, # /* > */
|
||||
0x3F, # /* ? */
|
||||
0x40, # /* @ */
|
||||
0x5B, # /* [ */
|
||||
0x5C, # /* \ */
|
||||
0x5D, # /* ] */
|
||||
0x5E, # /* ^ */
|
||||
0x5F, # /* _ */
|
||||
0x60, # /* ` */
|
||||
0x7B, # /* { */
|
||||
0x7C, # /* | */
|
||||
0x7D, # /* } */
|
||||
0x7E, # /* ~ */
|
||||
}
|
||||
|
||||
|
||||
def isMdAsciiPunct(ch: int) -> bool:
|
||||
"""Markdown ASCII punctuation characters.
|
||||
|
||||
::
|
||||
|
||||
!, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \\, ], ^, _, `, {, |, }, or ~
|
||||
|
||||
See http://spec.commonmark.org/0.15/#ascii-punctuation-character
|
||||
|
||||
Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
|
||||
|
||||
"""
|
||||
return ch in MD_ASCII_PUNCT
|
||||
|
||||
|
||||
def normalizeReference(string: str) -> str:
|
||||
"""Helper to unify [reference labels]."""
|
||||
# Trim and collapse whitespace
|
||||
#
|
||||
string = re.sub(r"\s+", " ", string.strip())
|
||||
|
||||
# In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
|
||||
# fixed in v12 (couldn't find any details).
|
||||
#
|
||||
# So treat this one as a special case
|
||||
# (remove this when node v10 is no longer supported).
|
||||
#
|
||||
# if ('ẞ'.toLowerCase() === 'Ṿ') {
|
||||
# str = str.replace(/ẞ/g, 'ß')
|
||||
# }
|
||||
|
||||
# .toLowerCase().toUpperCase() should get rid of all differences
|
||||
# between letter variants.
|
||||
#
|
||||
# Simple .toLowerCase() doesn't normalize 125 code points correctly,
|
||||
# and .toUpperCase doesn't normalize 6 of them (list of exceptions:
|
||||
# İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
|
||||
# uppercased versions).
|
||||
#
|
||||
# Here's an example showing how it happens. Lets take greek letter omega:
|
||||
# uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
|
||||
#
|
||||
# Unicode entries:
|
||||
# 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8
|
||||
# 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
|
||||
# 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
|
||||
# 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8
|
||||
#
|
||||
# Case-insensitive comparison should treat all of them as equivalent.
|
||||
#
|
||||
# But .toLowerCase() doesn't change ϑ (it's already lowercase),
|
||||
# and .toUpperCase() doesn't change ϴ (already uppercase).
|
||||
#
|
||||
# Applying first lower then upper case normalizes any character:
|
||||
# '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
|
||||
#
|
||||
# Note: this is equivalent to unicode case folding; unicode normalization
|
||||
# is a different step that is not required here.
|
||||
#
|
||||
# Final result should be uppercased, because it's later stored in an object
|
||||
# (this avoid a conflict with Object.prototype members,
|
||||
# most notably, `__proto__`)
|
||||
#
|
||||
return string.lower().upper()
|
||||
|
||||
|
||||
LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE)
|
||||
LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def isLinkOpen(string: str) -> bool:
|
||||
return bool(LINK_OPEN_RE.search(string))
|
||||
|
||||
|
||||
def isLinkClose(string: str) -> bool:
|
||||
return bool(LINK_CLOSE_RE.search(string))
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Functions for parsing Links"""
|
||||
|
||||
__all__ = ("parseLinkDestination", "parseLinkLabel", "parseLinkTitle")
|
||||
from .parse_link_destination import parseLinkDestination
|
||||
from .parse_link_label import parseLinkLabel
|
||||
from .parse_link_title import parseLinkTitle
|
||||
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
Parse link destination
|
||||
"""
|
||||
|
||||
from ..common.utils import charCodeAt, unescapeAll
|
||||
|
||||
|
||||
class _Result:
|
||||
__slots__ = ("ok", "pos", "str")
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.ok = False
|
||||
self.pos = 0
|
||||
self.str = ""
|
||||
|
||||
|
||||
def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
|
||||
start = pos
|
||||
result = _Result()
|
||||
|
||||
if charCodeAt(string, pos) == 0x3C: # /* < */
|
||||
pos += 1
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
if code == 0x0A: # /* \n */)
|
||||
return result
|
||||
if code == 0x3C: # / * < * /
|
||||
return result
|
||||
if code == 0x3E: # /* > */) {
|
||||
result.pos = pos + 1
|
||||
result.str = unescapeAll(string[start + 1 : pos])
|
||||
result.ok = True
|
||||
return result
|
||||
|
||||
if code == 0x5C and pos + 1 < maximum: # \
|
||||
pos += 2
|
||||
continue
|
||||
|
||||
pos += 1
|
||||
|
||||
# no closing '>'
|
||||
return result
|
||||
|
||||
# this should be ... } else { ... branch
|
||||
|
||||
level = 0
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
|
||||
if code is None or code == 0x20:
|
||||
break
|
||||
|
||||
# ascii control characters
|
||||
if code < 0x20 or code == 0x7F:
|
||||
break
|
||||
|
||||
if code == 0x5C and pos + 1 < maximum:
|
||||
if charCodeAt(string, pos + 1) == 0x20:
|
||||
break
|
||||
pos += 2
|
||||
continue
|
||||
|
||||
if code == 0x28: # /* ( */)
|
||||
level += 1
|
||||
if level > 32:
|
||||
return result
|
||||
|
||||
if code == 0x29: # /* ) */)
|
||||
if level == 0:
|
||||
break
|
||||
level -= 1
|
||||
|
||||
pos += 1
|
||||
|
||||
if start == pos:
|
||||
return result
|
||||
if level != 0:
|
||||
return result
|
||||
|
||||
result.str = unescapeAll(string[start:pos])
|
||||
result.pos = pos
|
||||
result.ok = True
|
||||
return result
|
||||
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Parse link label
|
||||
|
||||
this function assumes that first character ("[") already matches
|
||||
returns the end of the label
|
||||
|
||||
"""
|
||||
|
||||
from markdown_it.rules_inline import StateInline
|
||||
|
||||
|
||||
def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) -> int:
|
||||
labelEnd = -1
|
||||
oldPos = state.pos
|
||||
found = False
|
||||
|
||||
state.pos = start + 1
|
||||
level = 1
|
||||
|
||||
while state.pos < state.posMax:
|
||||
marker = state.src[state.pos]
|
||||
if marker == "]":
|
||||
level -= 1
|
||||
if level == 0:
|
||||
found = True
|
||||
break
|
||||
|
||||
prevPos = state.pos
|
||||
state.md.inline.skipToken(state)
|
||||
if marker == "[":
|
||||
if prevPos == state.pos - 1:
|
||||
# increase level if we find text `[`,
|
||||
# which is not a part of any token
|
||||
level += 1
|
||||
elif disableNested:
|
||||
state.pos = oldPos
|
||||
return -1
|
||||
if found:
|
||||
labelEnd = state.pos
|
||||
|
||||
# restore old state
|
||||
state.pos = oldPos
|
||||
|
||||
return labelEnd
|
||||
@@ -0,0 +1,75 @@
|
||||
"""Parse link title"""
|
||||
|
||||
from ..common.utils import charCodeAt, unescapeAll
|
||||
|
||||
|
||||
class _State:
|
||||
__slots__ = ("can_continue", "marker", "ok", "pos", "str")
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.ok = False
|
||||
"""if `true`, this is a valid link title"""
|
||||
self.can_continue = False
|
||||
"""if `true`, this link can be continued on the next line"""
|
||||
self.pos = 0
|
||||
"""if `ok`, it's the position of the first character after the closing marker"""
|
||||
self.str = ""
|
||||
"""if `ok`, it's the unescaped title"""
|
||||
self.marker = 0
|
||||
"""expected closing marker character code"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.str
|
||||
|
||||
|
||||
def parseLinkTitle(
|
||||
string: str, start: int, maximum: int, prev_state: _State | None = None
|
||||
) -> _State:
|
||||
"""Parse link title within `str` in [start, max] range,
|
||||
or continue previous parsing if `prev_state` is defined (equal to result of last execution).
|
||||
"""
|
||||
pos = start
|
||||
state = _State()
|
||||
|
||||
if prev_state is not None:
|
||||
# this is a continuation of a previous parseLinkTitle call on the next line,
|
||||
# used in reference links only
|
||||
state.str = prev_state.str
|
||||
state.marker = prev_state.marker
|
||||
else:
|
||||
if pos >= maximum:
|
||||
return state
|
||||
|
||||
marker = charCodeAt(string, pos)
|
||||
|
||||
# /* " */ /* ' */ /* ( */
|
||||
if marker != 0x22 and marker != 0x27 and marker != 0x28:
|
||||
return state
|
||||
|
||||
start += 1
|
||||
pos += 1
|
||||
|
||||
# if opening marker is "(", switch it to closing marker ")"
|
||||
if marker == 0x28:
|
||||
marker = 0x29
|
||||
|
||||
state.marker = marker
|
||||
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
if code == state.marker:
|
||||
state.pos = pos + 1
|
||||
state.str += unescapeAll(string[start:pos])
|
||||
state.ok = True
|
||||
return state
|
||||
elif code == 0x28 and state.marker == 0x29: # /* ( */ /* ) */
|
||||
return state
|
||||
elif code == 0x5C and pos + 1 < maximum: # /* \ */
|
||||
pos += 1
|
||||
|
||||
pos += 1
|
||||
|
||||
# no closing marker found, but this link title may continue on the next line (for references)
|
||||
state.can_continue = True
|
||||
state.str += unescapeAll(string[start:pos])
|
||||
return state
|
||||
@@ -0,0 +1,350 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Literal, overload
|
||||
|
||||
from . import helpers, presets
|
||||
from .common import normalize_url, utils
|
||||
from .parser_block import ParserBlock
|
||||
from .parser_core import ParserCore
|
||||
from .parser_inline import ParserInline
|
||||
from .renderer import RendererHTML, RendererProtocol
|
||||
from .rules_core.state_core import StateCore
|
||||
from .token import Token
|
||||
from .utils import EnvType, OptionsDict, OptionsType, PresetType
|
||||
|
||||
try:
|
||||
import linkify_it
|
||||
except ModuleNotFoundError:
|
||||
linkify_it = None
|
||||
|
||||
|
||||
_PRESETS: dict[str, PresetType] = {
|
||||
"default": presets.default.make(),
|
||||
"js-default": presets.js_default.make(),
|
||||
"zero": presets.zero.make(),
|
||||
"commonmark": presets.commonmark.make(),
|
||||
"gfm-like": presets.gfm_like.make(),
|
||||
}
|
||||
|
||||
|
||||
class MarkdownIt:
|
||||
def __init__(
|
||||
self,
|
||||
config: str | PresetType = "commonmark",
|
||||
options_update: Mapping[str, Any] | None = None,
|
||||
*,
|
||||
renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
|
||||
):
|
||||
"""Main parser class
|
||||
|
||||
:param config: name of configuration to load or a pre-defined dictionary
|
||||
:param options_update: dictionary that will be merged into ``config["options"]``
|
||||
:param renderer_cls: the class to load as the renderer:
|
||||
``self.renderer = renderer_cls(self)
|
||||
"""
|
||||
# add modules
|
||||
self.utils = utils
|
||||
self.helpers = helpers
|
||||
|
||||
# initialise classes
|
||||
self.inline = ParserInline()
|
||||
self.block = ParserBlock()
|
||||
self.core = ParserCore()
|
||||
self.renderer = renderer_cls(self)
|
||||
self.linkify = linkify_it.LinkifyIt() if linkify_it else None
|
||||
|
||||
# set the configuration
|
||||
if options_update and not isinstance(options_update, Mapping):
|
||||
# catch signature change where renderer_cls was not used as a key-word
|
||||
raise TypeError(
|
||||
f"options_update should be a mapping: {options_update}"
|
||||
"\n(Perhaps you intended this to be the renderer_cls?)"
|
||||
)
|
||||
self.configure(config, options_update=options_update)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.__class__.__module__}.{self.__class__.__name__}()"
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["inline"]) -> ParserInline: ...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["block"]) -> ParserBlock: ...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["core"]) -> ParserCore: ...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: ...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: str) -> Any: ...
|
||||
|
||||
def __getitem__(self, name: str) -> Any:
|
||||
return {
|
||||
"inline": self.inline,
|
||||
"block": self.block,
|
||||
"core": self.core,
|
||||
"renderer": self.renderer,
|
||||
}[name]
|
||||
|
||||
def set(self, options: OptionsType) -> None:
|
||||
"""Set parser options (in the same format as in constructor).
|
||||
Probably, you will never need it, but you can change options after constructor call.
|
||||
|
||||
__Note:__ To achieve the best possible performance, don't modify a
|
||||
`markdown-it` instance options on the fly. If you need multiple configurations
|
||||
it's best to create multiple instances and initialize each with separate config.
|
||||
"""
|
||||
self.options = OptionsDict(options)
|
||||
|
||||
def configure(
|
||||
self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None
|
||||
) -> MarkdownIt:
|
||||
"""Batch load of all options and component settings.
|
||||
This is an internal method, and you probably will not need it.
|
||||
But if you will - see available presets and data structure
|
||||
[here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
|
||||
|
||||
We strongly recommend to use presets instead of direct config loads.
|
||||
That will give better compatibility with next versions.
|
||||
"""
|
||||
if isinstance(presets, str):
|
||||
if presets not in _PRESETS:
|
||||
raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
|
||||
config = _PRESETS[presets]
|
||||
else:
|
||||
config = presets
|
||||
|
||||
if not config:
|
||||
raise ValueError("Wrong `markdown-it` config, can't be empty")
|
||||
|
||||
options = config.get("options", {}) or {}
|
||||
if options_update:
|
||||
options = {**options, **options_update} # type: ignore
|
||||
|
||||
self.set(options) # type: ignore
|
||||
|
||||
if "components" in config:
|
||||
for name, component in config["components"].items():
|
||||
rules = component.get("rules", None)
|
||||
if rules:
|
||||
self[name].ruler.enableOnly(rules)
|
||||
rules2 = component.get("rules2", None)
|
||||
if rules2:
|
||||
self[name].ruler2.enableOnly(rules2)
|
||||
|
||||
return self
|
||||
|
||||
def get_all_rules(self) -> dict[str, list[str]]:
|
||||
"""Return the names of all active rules."""
|
||||
rules = {
|
||||
chain: self[chain].ruler.get_all_rules()
|
||||
for chain in ["core", "block", "inline"]
|
||||
}
|
||||
rules["inline2"] = self.inline.ruler2.get_all_rules()
|
||||
return rules
|
||||
|
||||
def get_active_rules(self) -> dict[str, list[str]]:
|
||||
"""Return the names of all active rules."""
|
||||
rules = {
|
||||
chain: self[chain].ruler.get_active_rules()
|
||||
for chain in ["core", "block", "inline"]
|
||||
}
|
||||
rules["inline2"] = self.inline.ruler2.get_active_rules()
|
||||
return rules
|
||||
|
||||
def enable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> MarkdownIt:
|
||||
"""Enable list or rules. (chainable)
|
||||
|
||||
:param names: rule name or list of rule names to enable.
|
||||
:param ignoreInvalid: set `true` to ignore errors when rule not found.
|
||||
|
||||
It will automatically find appropriate components,
|
||||
containing rules with given names. If rule not found, and `ignoreInvalid`
|
||||
not set - throws exception.
|
||||
|
||||
Example::
|
||||
|
||||
md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
|
||||
|
||||
"""
|
||||
result = []
|
||||
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
|
||||
for chain in ["core", "block", "inline"]:
|
||||
result.extend(self[chain].ruler.enable(names, True))
|
||||
result.extend(self.inline.ruler2.enable(names, True))
|
||||
|
||||
missed = [name for name in names if name not in result]
|
||||
if missed and not ignoreInvalid:
|
||||
raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
|
||||
|
||||
return self
|
||||
|
||||
def disable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> MarkdownIt:
|
||||
"""The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
|
||||
|
||||
:param names: rule name or list of rule names to disable.
|
||||
:param ignoreInvalid: set `true` to ignore errors when rule not found.
|
||||
|
||||
"""
|
||||
result = []
|
||||
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
|
||||
for chain in ["core", "block", "inline"]:
|
||||
result.extend(self[chain].ruler.disable(names, True))
|
||||
result.extend(self.inline.ruler2.disable(names, True))
|
||||
|
||||
missed = [name for name in names if name not in result]
|
||||
if missed and not ignoreInvalid:
|
||||
raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
|
||||
return self
|
||||
|
||||
@contextmanager
|
||||
def reset_rules(self) -> Generator[None, None, None]:
|
||||
"""A context manager, that will reset the current enabled rules on exit."""
|
||||
chain_rules = self.get_active_rules()
|
||||
yield
|
||||
for chain, rules in chain_rules.items():
|
||||
if chain != "inline2":
|
||||
self[chain].ruler.enableOnly(rules)
|
||||
self.inline.ruler2.enableOnly(chain_rules["inline2"])
|
||||
|
||||
def add_render_rule(
|
||||
self, name: str, function: Callable[..., Any], fmt: str = "html"
|
||||
) -> None:
|
||||
"""Add a rule for rendering a particular Token type.
|
||||
|
||||
Only applied when ``renderer.__output__ == fmt``
|
||||
"""
|
||||
if self.renderer.__output__ == fmt:
|
||||
self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
|
||||
|
||||
def use(
|
||||
self, plugin: Callable[..., None], *params: Any, **options: Any
|
||||
) -> MarkdownIt:
|
||||
"""Load specified plugin with given params into current parser instance. (chainable)
|
||||
|
||||
It's just a sugar to call `plugin(md, params)` with curring.
|
||||
|
||||
Example::
|
||||
|
||||
def func(tokens, idx):
|
||||
tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
|
||||
md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
|
||||
|
||||
"""
|
||||
plugin(self, *params, **options)
|
||||
return self
|
||||
|
||||
def parse(self, src: str, env: EnvType | None = None) -> list[Token]:
|
||||
"""Parse the source string to a token stream
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
Parse input string and return list of block tokens (special token type
|
||||
"inline" will contain list of inline tokens).
|
||||
|
||||
`env` is used to pass data between "distributed" rules and return additional
|
||||
metadata like reference info, needed for the renderer. It also can be used to
|
||||
inject data in specific cases. Usually, you will be ok to pass `{}`,
|
||||
and then pass updated object to renderer.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
if not isinstance(env, MutableMapping):
|
||||
raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
|
||||
if not isinstance(src, str):
|
||||
raise TypeError(f"Input data should be a string, not {type(src)}")
|
||||
state = StateCore(src, self, env)
|
||||
self.core.process(state)
|
||||
return state.tokens
|
||||
|
||||
def render(self, src: str, env: EnvType | None = None) -> Any:
|
||||
"""Render markdown string into html. It does all magic for you :).
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
:returns: The output of the loaded renderer
|
||||
|
||||
`env` can be used to inject additional metadata (`{}` by default).
|
||||
But you will not need it with high probability. See also comment
|
||||
in [[MarkdownIt.parse]].
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
return self.renderer.render(self.parse(src, env), self.options, env)
|
||||
|
||||
def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]:
|
||||
"""The same as [[MarkdownIt.parse]] but skip all block rules.
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
It returns the
|
||||
block tokens list with the single `inline` element, containing parsed inline
|
||||
tokens in `children` property. Also updates `env` object.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
if not isinstance(env, MutableMapping):
|
||||
raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
|
||||
if not isinstance(src, str):
|
||||
raise TypeError(f"Input data should be a string, not {type(src)}")
|
||||
state = StateCore(src, self, env)
|
||||
state.inlineMode = True
|
||||
self.core.process(state)
|
||||
return state.tokens
|
||||
|
||||
def renderInline(self, src: str, env: EnvType | None = None) -> Any:
|
||||
"""Similar to [[MarkdownIt.render]] but for single paragraph content.
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
Similar to [[MarkdownIt.render]] but for single paragraph content. Result
|
||||
will NOT be wrapped into `<p>` tags.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
return self.renderer.render(self.parseInline(src, env), self.options, env)
|
||||
|
||||
# link methods
|
||||
|
||||
def validateLink(self, url: str) -> bool:
|
||||
"""Validate if the URL link is allowed in output.
|
||||
|
||||
This validator can prohibit more than really needed to prevent XSS.
|
||||
It's a tradeoff to keep code simple and to be secure by default.
|
||||
|
||||
Note: the url should be normalized at this point, and existing entities decoded.
|
||||
"""
|
||||
return normalize_url.validateLink(url)
|
||||
|
||||
def normalizeLink(self, url: str) -> str:
|
||||
"""Normalize destination URLs in links
|
||||
|
||||
::
|
||||
|
||||
[label]: destination 'title'
|
||||
^^^^^^^^^^^
|
||||
"""
|
||||
return normalize_url.normalizeLink(url)
|
||||
|
||||
def normalizeLinkText(self, link: str) -> str:
|
||||
"""Normalize autolink content
|
||||
|
||||
::
|
||||
|
||||
<destination>
|
||||
~~~~~~~~~~~
|
||||
"""
|
||||
return normalize_url.normalizeLinkText(link)
|
||||
@@ -0,0 +1,113 @@
|
||||
"""Block-level tokenizer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from . import rules_block
|
||||
from .ruler import Ruler
|
||||
from .rules_block.state_block import StateBlock
|
||||
from .token import Token
|
||||
from .utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
|
||||
"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
|
||||
|
||||
`silent` disables token generation, useful for lookahead.
|
||||
"""
|
||||
|
||||
_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
|
||||
# First 2 params - rule name & source. Secondary array - list of rules,
|
||||
# which can be terminated by this one.
|
||||
("table", rules_block.table, ["paragraph", "reference"]),
|
||||
("code", rules_block.code, []),
|
||||
("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
|
||||
(
|
||||
"blockquote",
|
||||
rules_block.blockquote,
|
||||
["paragraph", "reference", "blockquote", "list"],
|
||||
),
|
||||
("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
|
||||
("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
|
||||
("reference", rules_block.reference, []),
|
||||
("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
|
||||
("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
|
||||
("lheading", rules_block.lheading, []),
|
||||
("paragraph", rules_block.paragraph, []),
|
||||
]
|
||||
|
||||
|
||||
class ParserBlock:
|
||||
"""
|
||||
ParserBlock#ruler -> Ruler
|
||||
|
||||
[[Ruler]] instance. Keep configuration of block rules.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.ruler = Ruler[RuleFuncBlockType]()
|
||||
for name, rule, alt in _rules:
|
||||
self.ruler.push(name, rule, {"alt": alt})
|
||||
|
||||
def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
|
||||
"""Generate tokens for input range."""
|
||||
rules = self.ruler.getRules("")
|
||||
line = startLine
|
||||
maxNesting = state.md.options.maxNesting
|
||||
hasEmptyLines = False
|
||||
|
||||
while line < endLine:
|
||||
state.line = line = state.skipEmptyLines(line)
|
||||
if line >= endLine:
|
||||
break
|
||||
if state.sCount[line] < state.blkIndent:
|
||||
# Termination condition for nested calls.
|
||||
# Nested calls currently used for blockquotes & lists
|
||||
break
|
||||
if state.level >= maxNesting:
|
||||
# If nesting level exceeded - skip tail to the end.
|
||||
# That's not ordinary situation and we should not care about content.
|
||||
state.line = endLine
|
||||
break
|
||||
|
||||
# Try all possible rules.
|
||||
# On success, rule should:
|
||||
# - update `state.line`
|
||||
# - update `state.tokens`
|
||||
# - return True
|
||||
for rule in rules:
|
||||
if rule(state, line, endLine, False):
|
||||
break
|
||||
|
||||
# set state.tight if we had an empty line before current tag
|
||||
# i.e. latest empty line should not count
|
||||
state.tight = not hasEmptyLines
|
||||
|
||||
line = state.line
|
||||
|
||||
# paragraph might "eat" one newline after it in nested lists
|
||||
if (line - 1) < endLine and state.isEmpty(line - 1):
|
||||
hasEmptyLines = True
|
||||
|
||||
if line < endLine and state.isEmpty(line):
|
||||
hasEmptyLines = True
|
||||
line += 1
|
||||
state.line = line
|
||||
|
||||
def parse(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
|
||||
) -> list[Token] | None:
|
||||
"""Process input string and push block tokens into `outTokens`."""
|
||||
if not src:
|
||||
return None
|
||||
state = StateBlock(src, md, env, outTokens)
|
||||
self.tokenize(state, state.line, state.lineMax)
|
||||
return state.tokens
|
||||
@@ -0,0 +1,46 @@
|
||||
"""
|
||||
* class Core
|
||||
*
|
||||
* Top-level rules executor. Glues block/inline parsers and does intermediate
|
||||
* transformations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
|
||||
from .ruler import Ruler
|
||||
from .rules_core import (
|
||||
block,
|
||||
inline,
|
||||
linkify,
|
||||
normalize,
|
||||
replace,
|
||||
smartquotes,
|
||||
text_join,
|
||||
)
|
||||
from .rules_core.state_core import StateCore
|
||||
|
||||
RuleFuncCoreType = Callable[[StateCore], None]
|
||||
|
||||
_rules: list[tuple[str, RuleFuncCoreType]] = [
|
||||
("normalize", normalize),
|
||||
("block", block),
|
||||
("inline", inline),
|
||||
("linkify", linkify),
|
||||
("replacements", replace),
|
||||
("smartquotes", smartquotes),
|
||||
("text_join", text_join),
|
||||
]
|
||||
|
||||
|
||||
class ParserCore:
|
||||
def __init__(self) -> None:
|
||||
self.ruler = Ruler[RuleFuncCoreType]()
|
||||
for name, rule in _rules:
|
||||
self.ruler.push(name, rule)
|
||||
|
||||
def process(self, state: StateCore) -> None:
|
||||
"""Executes core chain rules."""
|
||||
for rule in self.ruler.getRules(""):
|
||||
rule(state)
|
||||
@@ -0,0 +1,148 @@
|
||||
"""Tokenizes paragraph content."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from . import rules_inline
|
||||
from .ruler import Ruler
|
||||
from .rules_inline.state_inline import StateInline
|
||||
from .token import Token
|
||||
from .utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
# Parser rules
|
||||
RuleFuncInlineType = Callable[[StateInline, bool], bool]
|
||||
"""(state: StateInline, silent: bool) -> matched: bool)
|
||||
|
||||
`silent` disables token generation, useful for lookahead.
|
||||
"""
|
||||
_rules: list[tuple[str, RuleFuncInlineType]] = [
|
||||
("text", rules_inline.text),
|
||||
("linkify", rules_inline.linkify),
|
||||
("newline", rules_inline.newline),
|
||||
("escape", rules_inline.escape),
|
||||
("backticks", rules_inline.backtick),
|
||||
("strikethrough", rules_inline.strikethrough.tokenize),
|
||||
("emphasis", rules_inline.emphasis.tokenize),
|
||||
("link", rules_inline.link),
|
||||
("image", rules_inline.image),
|
||||
("autolink", rules_inline.autolink),
|
||||
("html_inline", rules_inline.html_inline),
|
||||
("entity", rules_inline.entity),
|
||||
]
|
||||
|
||||
# Note `rule2` ruleset was created specifically for emphasis/strikethrough
|
||||
# post-processing and may be changed in the future.
|
||||
#
|
||||
# Don't use this for anything except pairs (plugins working with `balance_pairs`).
|
||||
#
|
||||
RuleFuncInline2Type = Callable[[StateInline], None]
|
||||
_rules2: list[tuple[str, RuleFuncInline2Type]] = [
|
||||
("balance_pairs", rules_inline.link_pairs),
|
||||
("strikethrough", rules_inline.strikethrough.postProcess),
|
||||
("emphasis", rules_inline.emphasis.postProcess),
|
||||
# rules for pairs separate '**' into its own text tokens, which may be left unused,
|
||||
# rule below merges unused segments back with the rest of the text
|
||||
("fragments_join", rules_inline.fragments_join),
|
||||
]
|
||||
|
||||
|
||||
class ParserInline:
|
||||
def __init__(self) -> None:
|
||||
self.ruler = Ruler[RuleFuncInlineType]()
|
||||
for name, rule in _rules:
|
||||
self.ruler.push(name, rule)
|
||||
# Second ruler used for post-processing (e.g. in emphasis-like rules)
|
||||
self.ruler2 = Ruler[RuleFuncInline2Type]()
|
||||
for name, rule2 in _rules2:
|
||||
self.ruler2.push(name, rule2)
|
||||
|
||||
def skipToken(self, state: StateInline) -> None:
|
||||
"""Skip single token by running all rules in validation mode;
|
||||
returns `True` if any rule reported success
|
||||
"""
|
||||
ok = False
|
||||
pos = state.pos
|
||||
rules = self.ruler.getRules("")
|
||||
maxNesting = state.md.options["maxNesting"]
|
||||
cache = state.cache
|
||||
|
||||
if pos in cache:
|
||||
state.pos = cache[pos]
|
||||
return
|
||||
|
||||
if state.level < maxNesting:
|
||||
for rule in rules:
|
||||
# Increment state.level and decrement it later to limit recursion.
|
||||
# It's harmless to do here, because no tokens are created.
|
||||
# But ideally, we'd need a separate private state variable for this purpose.
|
||||
state.level += 1
|
||||
ok = rule(state, True)
|
||||
state.level -= 1
|
||||
if ok:
|
||||
break
|
||||
else:
|
||||
# Too much nesting, just skip until the end of the paragraph.
|
||||
#
|
||||
# NOTE: this will cause links to behave incorrectly in the following case,
|
||||
# when an amount of `[` is exactly equal to `maxNesting + 1`:
|
||||
#
|
||||
# [[[[[[[[[[[[[[[[[[[[[foo]()
|
||||
#
|
||||
# TODO: remove this workaround when CM standard will allow nested links
|
||||
# (we can replace it by preventing links from being parsed in
|
||||
# validation mode)
|
||||
#
|
||||
state.pos = state.posMax
|
||||
|
||||
if not ok:
|
||||
state.pos += 1
|
||||
cache[pos] = state.pos
|
||||
|
||||
def tokenize(self, state: StateInline) -> None:
|
||||
"""Generate tokens for input range."""
|
||||
ok = False
|
||||
rules = self.ruler.getRules("")
|
||||
end = state.posMax
|
||||
maxNesting = state.md.options["maxNesting"]
|
||||
|
||||
while state.pos < end:
|
||||
# Try all possible rules.
|
||||
# On success, rule should:
|
||||
#
|
||||
# - update `state.pos`
|
||||
# - update `state.tokens`
|
||||
# - return true
|
||||
|
||||
if state.level < maxNesting:
|
||||
for rule in rules:
|
||||
ok = rule(state, False)
|
||||
if ok:
|
||||
break
|
||||
|
||||
if ok:
|
||||
if state.pos >= end:
|
||||
break
|
||||
continue
|
||||
|
||||
state.pending += state.src[state.pos]
|
||||
state.pos += 1
|
||||
|
||||
if state.pending:
|
||||
state.pushPending()
|
||||
|
||||
def parse(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
|
||||
) -> list[Token]:
|
||||
"""Process input string and push inline tokens into `tokens`"""
|
||||
state = StateInline(src, md, env, tokens)
|
||||
self.tokenize(state)
|
||||
rules2 = self.ruler2.getRules("")
|
||||
for rule in rules2:
|
||||
rule(state)
|
||||
return state.tokens
|
||||
@@ -0,0 +1,48 @@
|
||||
- package: markdown-it/markdown-it
|
||||
version: 14.1.0
|
||||
commit: 0fe7ccb4b7f30236fb05f623be6924961d296d3d
|
||||
date: Mar 19, 2024
|
||||
notes:
|
||||
- Rename variables that use python built-in names, e.g.
|
||||
- `max` -> `maximum`
|
||||
- `len` -> `length`
|
||||
- `str` -> `string`
|
||||
- |
|
||||
Convert JS `for` loops to `while` loops
|
||||
this is generally the main difference between the codes,
|
||||
because in python you can't do e.g. `for {i=1;i<x;i++} {}`
|
||||
- |
|
||||
`env` is a common Python dictionary, and so does not have attribute access to keys,
|
||||
as with JavaScript dictionaries.
|
||||
`options` have attribute access only to core markdownit configuration options
|
||||
- |
|
||||
`Token.attrs` is a dictionary, instead of a list of lists.
|
||||
Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
|
||||
but in Python 3.7+ order of dictionaries is guaranteed.
|
||||
One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
|
||||
to manipulate `Token.attrs`, which have an identical signature to those upstream.
|
||||
- Use python version of `charCodeAt`
|
||||
- |
|
||||
Use `str` units instead of `int`s to represent Unicode codepoints.
|
||||
This provides a significant performance boost
|
||||
- |
|
||||
In markdown_it/rules_block/reference.py,
|
||||
record line range in state.env["references"] and add state.env["duplicate_refs"]
|
||||
This is to allow renderers to report on issues regarding references
|
||||
- |
|
||||
The `MarkdownIt.__init__` signature is slightly different for updating options,
|
||||
since you must always specify the config first, e.g.
|
||||
use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
|
||||
- The default configuration preset for `MarkdownIt` is "commonmark" not "default"
|
||||
- Allow custom renderer to be passed to `MarkdownIt`
|
||||
- |
|
||||
change render method signatures
|
||||
`func(tokens, idx, options, env, slf)` to
|
||||
`func(self, tokens, idx, options, env)`
|
||||
- |
|
||||
Extensions add render methods by format
|
||||
`MarkdownIt.add_render_rule(name, function, fmt="html")`,
|
||||
rather than `MarkdownIt.renderer.rules[name] = function`
|
||||
and renderers should declare a class property `__output__ = "html"`.
|
||||
This allows for extensibility to more than just HTML renderers
|
||||
- inline tokens in tables are assigned a map (this is helpful for propagation to children)
|
||||
@@ -0,0 +1,28 @@
|
||||
__all__ = ("commonmark", "default", "gfm_like", "js_default", "zero")
|
||||
|
||||
from ..utils import PresetType
|
||||
from . import commonmark, default, zero
|
||||
|
||||
js_default = default
|
||||
|
||||
|
||||
class gfm_like: # noqa: N801
|
||||
"""GitHub Flavoured Markdown (GFM) like.
|
||||
|
||||
This adds the linkify, table and strikethrough components to CommmonMark.
|
||||
|
||||
Note, it lacks task-list items and raw HTML filtering,
|
||||
to meet the the full GFM specification
|
||||
(see https://github.github.com/gfm/#autolinks-extension-).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def make() -> PresetType:
|
||||
config = commonmark.make()
|
||||
config["components"]["core"]["rules"].append("linkify")
|
||||
config["components"]["block"]["rules"].append("table")
|
||||
config["components"]["inline"]["rules"].extend(["strikethrough", "linkify"])
|
||||
config["components"]["inline"]["rules2"].append("strikethrough")
|
||||
config["options"]["linkify"] = True
|
||||
config["options"]["html"] = True
|
||||
return config
|
||||
@@ -0,0 +1,75 @@
|
||||
"""Commonmark default options.
|
||||
|
||||
This differs to presets.default,
|
||||
primarily in that it allows HTML and does not enable components:
|
||||
|
||||
- block: table
|
||||
- inline: strikethrough
|
||||
"""
|
||||
|
||||
from ..utils import PresetType
|
||||
|
||||
|
||||
def make() -> PresetType:
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 20, # Internal protection, recursion limit
|
||||
"html": True, # Enable HTML tags in source,
|
||||
# this is just a shorthand for .enable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
#
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": True, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
#
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
#
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {
|
||||
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
|
||||
"block": {
|
||||
"rules": [
|
||||
"blockquote",
|
||||
"code",
|
||||
"fence",
|
||||
"heading",
|
||||
"hr",
|
||||
"html_block",
|
||||
"lheading",
|
||||
"list",
|
||||
"reference",
|
||||
"paragraph",
|
||||
]
|
||||
},
|
||||
"inline": {
|
||||
"rules": [
|
||||
"autolink",
|
||||
"backticks",
|
||||
"emphasis",
|
||||
"entity",
|
||||
"escape",
|
||||
"html_inline",
|
||||
"image",
|
||||
"link",
|
||||
"newline",
|
||||
"text",
|
||||
],
|
||||
"rules2": ["balance_pairs", "emphasis", "fragments_join"],
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
"""markdown-it default options."""
|
||||
|
||||
from ..utils import PresetType
|
||||
|
||||
|
||||
def make() -> PresetType:
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 100, # Internal protection, recursion limit
|
||||
"html": False, # Enable HTML tags in source
|
||||
# this is just a shorthand for .disable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules:
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": False, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
#
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
#
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {"core": {}, "block": {}, "inline": {}},
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
"Zero" preset, with nothing enabled. Useful for manual configuring of simple
|
||||
modes. For example, to parse bold/italic only.
|
||||
"""
|
||||
|
||||
from ..utils import PresetType
|
||||
|
||||
|
||||
def make() -> PresetType:
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 20, # Internal protection, recursion limit
|
||||
"html": False, # Enable HTML tags in source
|
||||
# this is just a shorthand for .disable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules:
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": False, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {
|
||||
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
|
||||
"block": {"rules": ["paragraph"]},
|
||||
"inline": {
|
||||
"rules": ["text"],
|
||||
"rules2": ["balance_pairs", "fragments_join"],
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
# Marker file for PEP 561
|
||||
@@ -0,0 +1,336 @@
|
||||
"""
|
||||
class Renderer
|
||||
|
||||
Generates HTML from parsed token stream. Each instance has independent
|
||||
copy of rules. Those can be rewritten with ease. Also, you can add new
|
||||
rules if you create plugin and adds new token types.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
import inspect
|
||||
from typing import Any, ClassVar, Protocol
|
||||
|
||||
from .common.utils import escapeHtml, unescapeAll
|
||||
from .token import Token
|
||||
from .utils import EnvType, OptionsDict
|
||||
|
||||
|
||||
class RendererProtocol(Protocol):
|
||||
__output__: ClassVar[str]
|
||||
|
||||
def render(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
|
||||
) -> Any: ...
|
||||
|
||||
|
||||
class RendererHTML(RendererProtocol):
|
||||
"""Contains render rules for tokens. Can be updated and extended.
|
||||
|
||||
Example:
|
||||
|
||||
Each rule is called as independent static function with fixed signature:
|
||||
|
||||
::
|
||||
|
||||
class Renderer:
|
||||
def token_type_name(self, tokens, idx, options, env) {
|
||||
# ...
|
||||
return renderedHTML
|
||||
|
||||
::
|
||||
|
||||
class CustomRenderer(RendererHTML):
|
||||
def strong_open(self, tokens, idx, options, env):
|
||||
return '<b>'
|
||||
def strong_close(self, tokens, idx, options, env):
|
||||
return '</b>'
|
||||
|
||||
md = MarkdownIt(renderer_cls=CustomRenderer)
|
||||
|
||||
result = md.render(...)
|
||||
|
||||
See https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js
|
||||
for more details and examples.
|
||||
"""
|
||||
|
||||
__output__ = "html"
|
||||
|
||||
def __init__(self, parser: Any = None):
|
||||
self.rules = {
|
||||
k: v
|
||||
for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
|
||||
if not (k.startswith("render") or k.startswith("_"))
|
||||
}
|
||||
|
||||
def render(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
"""Takes token stream and generates HTML.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input
|
||||
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
if token.type == "inline":
|
||||
if token.children:
|
||||
result += self.renderInline(token.children, options, env)
|
||||
elif token.type in self.rules:
|
||||
result += self.rules[token.type](tokens, i, options, env)
|
||||
else:
|
||||
result += self.renderToken(tokens, i, options, env)
|
||||
|
||||
return result
|
||||
|
||||
def renderInline(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
"""The same as ``render``, but for single token of `inline` type.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input (references, for example)
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
if token.type in self.rules:
|
||||
result += self.rules[token.type](tokens, i, options, env)
|
||||
else:
|
||||
result += self.renderToken(tokens, i, options, env)
|
||||
|
||||
return result
|
||||
|
||||
def renderToken(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
"""Default token renderer.
|
||||
|
||||
Can be overridden by custom function
|
||||
|
||||
:param idx: token index to render
|
||||
:param options: params of parser instance
|
||||
"""
|
||||
result = ""
|
||||
needLf = False
|
||||
token = tokens[idx]
|
||||
|
||||
# Tight list paragraphs
|
||||
if token.hidden:
|
||||
return ""
|
||||
|
||||
# Insert a newline between hidden paragraph and subsequent opening
|
||||
# block-level tag.
|
||||
#
|
||||
# For example, here we should insert a newline before blockquote:
|
||||
# - a
|
||||
# >
|
||||
#
|
||||
if token.block and token.nesting != -1 and idx and tokens[idx - 1].hidden:
|
||||
result += "\n"
|
||||
|
||||
# Add token name, e.g. `<img`
|
||||
result += ("</" if token.nesting == -1 else "<") + token.tag
|
||||
|
||||
# Encode attributes, e.g. `<img src="foo"`
|
||||
result += self.renderAttrs(token)
|
||||
|
||||
# Add a slash for self-closing tags, e.g. `<img src="foo" /`
|
||||
if token.nesting == 0 and options["xhtmlOut"]:
|
||||
result += " /"
|
||||
|
||||
# Check if we need to add a newline after this tag
|
||||
if token.block:
|
||||
needLf = True
|
||||
|
||||
if token.nesting == 1 and (idx + 1 < len(tokens)):
|
||||
nextToken = tokens[idx + 1]
|
||||
|
||||
if nextToken.type == "inline" or nextToken.hidden:
|
||||
# Block-level tag containing an inline tag.
|
||||
#
|
||||
needLf = False
|
||||
|
||||
elif nextToken.nesting == -1 and nextToken.tag == token.tag:
|
||||
# Opening tag + closing tag of the same type. E.g. `<li></li>`.
|
||||
#
|
||||
needLf = False
|
||||
|
||||
result += ">\n" if needLf else ">"
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def renderAttrs(token: Token) -> str:
|
||||
"""Render token attributes to string."""
|
||||
result = ""
|
||||
|
||||
for key, value in token.attrItems():
|
||||
result += " " + escapeHtml(key) + '="' + escapeHtml(str(value)) + '"'
|
||||
|
||||
return result
|
||||
|
||||
def renderInlineAsText(
|
||||
self,
|
||||
tokens: Sequence[Token] | None,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
"""Special kludge for image `alt` attributes to conform CommonMark spec.
|
||||
|
||||
Don't try to use it! Spec requires to show `alt` content with stripped markup,
|
||||
instead of simple escaping.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for token in tokens or []:
|
||||
if token.type == "text":
|
||||
result += token.content
|
||||
elif token.type == "image":
|
||||
if token.children:
|
||||
result += self.renderInlineAsText(token.children, options, env)
|
||||
elif token.type == "softbreak":
|
||||
result += "\n"
|
||||
|
||||
return result
|
||||
|
||||
###################################################
|
||||
|
||||
def code_inline(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
return (
|
||||
"<code"
|
||||
+ self.renderAttrs(token)
|
||||
+ ">"
|
||||
+ escapeHtml(tokens[idx].content)
|
||||
+ "</code>"
|
||||
)
|
||||
|
||||
def code_block(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
|
||||
return (
|
||||
"<pre"
|
||||
+ self.renderAttrs(token)
|
||||
+ "><code>"
|
||||
+ escapeHtml(tokens[idx].content)
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
def fence(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
info = unescapeAll(token.info).strip() if token.info else ""
|
||||
langName = ""
|
||||
langAttrs = ""
|
||||
|
||||
if info:
|
||||
arr = info.split(maxsplit=1)
|
||||
langName = arr[0]
|
||||
if len(arr) == 2:
|
||||
langAttrs = arr[1]
|
||||
|
||||
if options.highlight:
|
||||
highlighted = options.highlight(
|
||||
token.content, langName, langAttrs
|
||||
) or escapeHtml(token.content)
|
||||
else:
|
||||
highlighted = escapeHtml(token.content)
|
||||
|
||||
if highlighted.startswith("<pre"):
|
||||
return highlighted + "\n"
|
||||
|
||||
# If language exists, inject class gently, without modifying original token.
|
||||
# May be, one day we will add .deepClone() for token and simplify this part, but
|
||||
# now we prefer to keep things local.
|
||||
if info:
|
||||
# Fake token just to render attributes
|
||||
tmpToken = Token(type="", tag="", nesting=0, attrs=token.attrs.copy())
|
||||
tmpToken.attrJoin("class", options.langPrefix + langName)
|
||||
|
||||
return (
|
||||
"<pre><code"
|
||||
+ self.renderAttrs(tmpToken)
|
||||
+ ">"
|
||||
+ highlighted
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
return (
|
||||
"<pre><code"
|
||||
+ self.renderAttrs(token)
|
||||
+ ">"
|
||||
+ highlighted
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
def image(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
|
||||
# "alt" attr MUST be set, even if empty. Because it's mandatory and
|
||||
# should be placed on proper position for tests.
|
||||
if token.children:
|
||||
token.attrSet("alt", self.renderInlineAsText(token.children, options, env))
|
||||
else:
|
||||
token.attrSet("alt", "")
|
||||
|
||||
return self.renderToken(tokens, idx, options, env)
|
||||
|
||||
def hardbreak(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return "<br />\n" if options.xhtmlOut else "<br>\n"
|
||||
|
||||
def softbreak(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return (
|
||||
("<br />\n" if options.xhtmlOut else "<br>\n") if options.breaks else "\n"
|
||||
)
|
||||
|
||||
def text(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return escapeHtml(tokens[idx].content)
|
||||
|
||||
def html_block(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return tokens[idx].content
|
||||
|
||||
def html_inline(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return tokens[idx].content
|
||||
@@ -0,0 +1,275 @@
|
||||
"""
|
||||
class Ruler
|
||||
|
||||
Helper class, used by [[MarkdownIt#core]], [[MarkdownIt#block]] and
|
||||
[[MarkdownIt#inline]] to manage sequences of functions (rules):
|
||||
|
||||
- keep rules in defined order
|
||||
- assign the name to each rule
|
||||
- enable/disable rules
|
||||
- add/replace rules
|
||||
- allow assign rules to additional named chains (in the same)
|
||||
- caching lists of active rules
|
||||
|
||||
You will not need use this class directly until write plugins. For simple
|
||||
rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and
|
||||
[[MarkdownIt.use]].
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Generic, TypedDict, TypeVar
|
||||
import warnings
|
||||
|
||||
from .utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
class StateBase:
|
||||
def __init__(self, src: str, md: MarkdownIt, env: EnvType):
|
||||
self.src = src
|
||||
self.env = env
|
||||
self.md = md
|
||||
|
||||
@property
|
||||
def src(self) -> str:
|
||||
return self._src
|
||||
|
||||
@src.setter
|
||||
def src(self, value: str) -> None:
|
||||
self._src = value
|
||||
self._srcCharCode: tuple[int, ...] | None = None
|
||||
|
||||
@property
|
||||
def srcCharCode(self) -> tuple[int, ...]:
|
||||
warnings.warn(
|
||||
"StateBase.srcCharCode is deprecated. Use StateBase.src instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if self._srcCharCode is None:
|
||||
self._srcCharCode = tuple(ord(c) for c in self._src)
|
||||
return self._srcCharCode
|
||||
|
||||
|
||||
class RuleOptionsType(TypedDict, total=False):
|
||||
alt: list[str]
|
||||
|
||||
|
||||
RuleFuncTv = TypeVar("RuleFuncTv")
|
||||
"""A rule function, whose signature is dependent on the state type."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Rule(Generic[RuleFuncTv]):
|
||||
name: str
|
||||
enabled: bool
|
||||
fn: RuleFuncTv = field(repr=False)
|
||||
alt: list[str]
|
||||
|
||||
|
||||
class Ruler(Generic[RuleFuncTv]):
|
||||
def __init__(self) -> None:
|
||||
# List of added rules.
|
||||
self.__rules__: list[Rule[RuleFuncTv]] = []
|
||||
# Cached rule chains.
|
||||
# First level - chain name, '' for default.
|
||||
# Second level - diginal anchor for fast filtering by charcodes.
|
||||
self.__cache__: dict[str, list[RuleFuncTv]] | None = None
|
||||
|
||||
def __find__(self, name: str) -> int:
|
||||
"""Find rule index by name"""
|
||||
for i, rule in enumerate(self.__rules__):
|
||||
if rule.name == name:
|
||||
return i
|
||||
return -1
|
||||
|
||||
def __compile__(self) -> None:
|
||||
"""Build rules lookup cache"""
|
||||
chains = {""}
|
||||
# collect unique names
|
||||
for rule in self.__rules__:
|
||||
if not rule.enabled:
|
||||
continue
|
||||
for name in rule.alt:
|
||||
chains.add(name)
|
||||
self.__cache__ = {}
|
||||
for chain in chains:
|
||||
self.__cache__[chain] = []
|
||||
for rule in self.__rules__:
|
||||
if not rule.enabled:
|
||||
continue
|
||||
if chain and (chain not in rule.alt):
|
||||
continue
|
||||
self.__cache__[chain].append(rule.fn)
|
||||
|
||||
def at(
|
||||
self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None
|
||||
) -> None:
|
||||
"""Replace rule by name with new function & options.
|
||||
|
||||
:param ruleName: rule name to replace.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(ruleName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {ruleName}")
|
||||
self.__rules__[index].fn = fn
|
||||
self.__rules__[index].alt = options.get("alt", [])
|
||||
self.__cache__ = None
|
||||
|
||||
def before(
|
||||
self,
|
||||
beforeName: str,
|
||||
ruleName: str,
|
||||
fn: RuleFuncTv,
|
||||
options: RuleOptionsType | None = None,
|
||||
) -> None:
|
||||
"""Add new rule to chain before one with given name.
|
||||
|
||||
:param beforeName: new rule will be added before this one.
|
||||
:param ruleName: new rule will be added before this one.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(beforeName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {beforeName}")
|
||||
self.__rules__.insert(
|
||||
index, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", []))
|
||||
)
|
||||
self.__cache__ = None
|
||||
|
||||
def after(
|
||||
self,
|
||||
afterName: str,
|
||||
ruleName: str,
|
||||
fn: RuleFuncTv,
|
||||
options: RuleOptionsType | None = None,
|
||||
) -> None:
|
||||
"""Add new rule to chain after one with given name.
|
||||
|
||||
:param afterName: new rule will be added after this one.
|
||||
:param ruleName: new rule will be added after this one.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(afterName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {afterName}")
|
||||
self.__rules__.insert(
|
||||
index + 1, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", []))
|
||||
)
|
||||
self.__cache__ = None
|
||||
|
||||
def push(
|
||||
self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None
|
||||
) -> None:
|
||||
"""Push new rule to the end of chain.
|
||||
|
||||
:param ruleName: new rule will be added to the end of chain.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
|
||||
"""
|
||||
self.__rules__.append(
|
||||
Rule[RuleFuncTv](ruleName, True, fn, (options or {}).get("alt", []))
|
||||
)
|
||||
self.__cache__ = None
|
||||
|
||||
def enable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> list[str]:
|
||||
"""Enable rules with given names.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
result: list[str] = []
|
||||
for name in names:
|
||||
idx = self.__find__(name)
|
||||
if (idx < 0) and ignoreInvalid:
|
||||
continue
|
||||
if (idx < 0) and not ignoreInvalid:
|
||||
raise KeyError(f"Rules manager: invalid rule name {name}")
|
||||
self.__rules__[idx].enabled = True
|
||||
result.append(name)
|
||||
self.__cache__ = None
|
||||
return result
|
||||
|
||||
def enableOnly(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> list[str]:
|
||||
"""Enable rules with given names, and disable everything else.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
for rule in self.__rules__:
|
||||
rule.enabled = False
|
||||
return self.enable(names, ignoreInvalid)
|
||||
|
||||
def disable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> list[str]:
|
||||
"""Disable rules with given names.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
result = []
|
||||
for name in names:
|
||||
idx = self.__find__(name)
|
||||
if (idx < 0) and ignoreInvalid:
|
||||
continue
|
||||
if (idx < 0) and not ignoreInvalid:
|
||||
raise KeyError(f"Rules manager: invalid rule name {name}")
|
||||
self.__rules__[idx].enabled = False
|
||||
result.append(name)
|
||||
self.__cache__ = None
|
||||
return result
|
||||
|
||||
def getRules(self, chainName: str = "") -> list[RuleFuncTv]:
|
||||
"""Return array of active functions (rules) for given chain name.
|
||||
It analyzes rules configuration, compiles caches if not exists and returns result.
|
||||
|
||||
Default chain name is `''` (empty string). It can't be skipped.
|
||||
That's done intentionally, to keep signature monomorphic for high speed.
|
||||
|
||||
"""
|
||||
if self.__cache__ is None:
|
||||
self.__compile__()
|
||||
assert self.__cache__ is not None
|
||||
# Chain can be empty, if rules disabled. But we still have to return Array.
|
||||
return self.__cache__.get(chainName, []) or []
|
||||
|
||||
def get_all_rules(self) -> list[str]:
|
||||
"""Return all available rule names."""
|
||||
return [r.name for r in self.__rules__]
|
||||
|
||||
def get_active_rules(self) -> list[str]:
|
||||
"""Return the active rule names."""
|
||||
return [r.name for r in self.__rules__ if r.enabled]
|
||||
@@ -0,0 +1,27 @@
|
||||
__all__ = (
|
||||
"StateBlock",
|
||||
"blockquote",
|
||||
"code",
|
||||
"fence",
|
||||
"heading",
|
||||
"hr",
|
||||
"html_block",
|
||||
"lheading",
|
||||
"list_block",
|
||||
"paragraph",
|
||||
"reference",
|
||||
"table",
|
||||
)
|
||||
|
||||
from .blockquote import blockquote
|
||||
from .code import code
|
||||
from .fence import fence
|
||||
from .heading import heading
|
||||
from .hr import hr
|
||||
from .html_block import html_block
|
||||
from .lheading import lheading
|
||||
from .list import list_block
|
||||
from .paragraph import paragraph
|
||||
from .reference import reference
|
||||
from .state_block import StateBlock
|
||||
from .table import table
|
||||
@@ -0,0 +1,299 @@
|
||||
# Block quotes
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
oldLineMax = state.lineMax
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
max = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
# check the block quote marker
|
||||
try:
|
||||
if state.src[pos] != ">":
|
||||
return False
|
||||
except IndexError:
|
||||
return False
|
||||
pos += 1
|
||||
|
||||
# we know that it's going to be a valid blockquote,
|
||||
# so no point trying to find the end of it in silent mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[startLine] + 1
|
||||
|
||||
try:
|
||||
second_char: str | None = state.src[pos]
|
||||
except IndexError:
|
||||
second_char = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if second_char == " ":
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif second_char == "\t":
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[startLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks = [state.bMarks[startLine]]
|
||||
state.bMarks[startLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
offset += (
|
||||
4
|
||||
- (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
oldBSCount = [state.bsCount[startLine]]
|
||||
state.bsCount[startLine] = (
|
||||
state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldSCount = [state.sCount[startLine]]
|
||||
state.sCount[startLine] = offset - initial
|
||||
|
||||
oldTShift = [state.tShift[startLine]]
|
||||
state.tShift[startLine] = pos - state.bMarks[startLine]
|
||||
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "blockquote"
|
||||
|
||||
# Search the end of the block
|
||||
#
|
||||
# Block ends with either:
|
||||
# 1. an empty line outside:
|
||||
# ```
|
||||
# > test
|
||||
#
|
||||
# ```
|
||||
# 2. an empty line inside:
|
||||
# ```
|
||||
# >
|
||||
# test
|
||||
# ```
|
||||
# 3. another tag:
|
||||
# ```
|
||||
# > test
|
||||
# - - -
|
||||
# ```
|
||||
|
||||
# for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
|
||||
nextLine = startLine + 1
|
||||
while nextLine < endLine:
|
||||
# check if it's outdented, i.e. it's inside list item and indented
|
||||
# less than said list item:
|
||||
#
|
||||
# ```
|
||||
# 1. anything
|
||||
# > current blockquote
|
||||
# 2. checking this line
|
||||
# ```
|
||||
isOutdented = state.sCount[nextLine] < state.blkIndent
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
max = state.eMarks[nextLine]
|
||||
|
||||
if pos >= max:
|
||||
# Case 1: line is not inside the blockquote, and this line is empty.
|
||||
break
|
||||
|
||||
evaluatesTrue = state.src[pos] == ">" and not isOutdented
|
||||
pos += 1
|
||||
if evaluatesTrue:
|
||||
# This line is inside the blockquote.
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[nextLine] + 1
|
||||
|
||||
try:
|
||||
next_char: str | None = state.src[pos]
|
||||
except IndexError:
|
||||
next_char = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if next_char == " ":
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif next_char == "\t":
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[nextLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
state.bMarks[nextLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
offset += (
|
||||
4
|
||||
- (
|
||||
offset
|
||||
+ state.bsCount[nextLine]
|
||||
+ (1 if adjustTab else 0)
|
||||
)
|
||||
% 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
state.bsCount[nextLine] = (
|
||||
state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] = offset - initial
|
||||
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
state.tShift[nextLine] = pos - state.bMarks[nextLine]
|
||||
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Case 2: line is not inside the blockquote, and the last line was empty.
|
||||
if lastLineEmpty:
|
||||
break
|
||||
|
||||
# Case 3: another tag found.
|
||||
terminate = False
|
||||
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
# Quirk to enforce "hard termination mode" for paragraphs;
|
||||
# normally if you call `tokenize(state, startLine, nextLine)`,
|
||||
# paragraphs will look below nextLine for paragraph continuation,
|
||||
# but if blockquote is terminated by another tag, they shouldn't
|
||||
state.lineMax = nextLine
|
||||
|
||||
if state.blkIndent != 0:
|
||||
# state.blkIndent was non-zero, we now set it to zero,
|
||||
# so we need to re-calculate all offsets to appear as
|
||||
# if indent wasn't changed
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] -= state.blkIndent
|
||||
|
||||
break
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
|
||||
# A negative indentation means that this is a paragraph continuation
|
||||
#
|
||||
state.sCount[nextLine] = -1
|
||||
|
||||
nextLine += 1
|
||||
|
||||
oldIndent = state.blkIndent
|
||||
state.blkIndent = 0
|
||||
|
||||
token = state.push("blockquote_open", "blockquote", 1)
|
||||
token.markup = ">"
|
||||
token.map = lines = [startLine, 0]
|
||||
|
||||
state.md.block.tokenize(state, startLine, nextLine)
|
||||
|
||||
token = state.push("blockquote_close", "blockquote", -1)
|
||||
token.markup = ">"
|
||||
|
||||
state.lineMax = oldLineMax
|
||||
state.parentType = oldParentType
|
||||
lines[1] = state.line
|
||||
|
||||
# Restore original tShift; this might not be necessary since the parser
|
||||
# has already been here, but just to make sure we can do that.
|
||||
for i, item in enumerate(oldTShift):
|
||||
state.bMarks[i + startLine] = oldBMarks[i]
|
||||
state.tShift[i + startLine] = item
|
||||
state.sCount[i + startLine] = oldSCount[i]
|
||||
state.bsCount[i + startLine] = oldBSCount[i]
|
||||
|
||||
state.blkIndent = oldIndent
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,36 @@
|
||||
"""Code block (4 spaces padded)."""
|
||||
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
if not state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
last = nextLine = startLine + 1
|
||||
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
nextLine += 1
|
||||
last = nextLine
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
state.line = last
|
||||
|
||||
token = state.push("code_block", "code", 0)
|
||||
token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,101 @@
|
||||
# fences (``` lang, ~~~ lang)
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
haveEndMarker = False
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if pos + 3 > maximum:
|
||||
return False
|
||||
|
||||
marker = state.src[pos]
|
||||
|
||||
if marker not in ("~", "`"):
|
||||
return False
|
||||
|
||||
# scan marker length
|
||||
mem = pos
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
|
||||
length = pos - mem
|
||||
|
||||
if length < 3:
|
||||
return False
|
||||
|
||||
markup = state.src[mem:pos]
|
||||
params = state.src[pos:maximum]
|
||||
|
||||
if marker == "`" and marker in params:
|
||||
return False
|
||||
|
||||
# Since start is found, we can report success here in validation mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# search end of block
|
||||
nextLine = startLine
|
||||
|
||||
while True:
|
||||
nextLine += 1
|
||||
if nextLine >= endLine:
|
||||
# unclosed block should be autoclosed by end of document.
|
||||
# also block seems to be autoclosed by end of parent
|
||||
break
|
||||
|
||||
pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum and state.sCount[nextLine] < state.blkIndent:
|
||||
# non-empty line with negative indent should stop the list:
|
||||
# - ```
|
||||
# test
|
||||
break
|
||||
|
||||
try:
|
||||
if state.src[pos] != marker:
|
||||
continue
|
||||
except IndexError:
|
||||
break
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
continue
|
||||
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
|
||||
# closing code fence must be at least as long as the opening one
|
||||
if pos - mem < length:
|
||||
continue
|
||||
|
||||
# make sure tail has spaces only
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
if pos < maximum:
|
||||
continue
|
||||
|
||||
haveEndMarker = True
|
||||
# found!
|
||||
break
|
||||
|
||||
# If a fence has heading spaces, they should be removed from its inner block
|
||||
length = state.sCount[startLine]
|
||||
|
||||
state.line = nextLine + (1 if haveEndMarker else 0)
|
||||
|
||||
token = state.push("fence", "code", 0)
|
||||
token.info = params
|
||||
token.content = state.getLines(startLine + 1, nextLine, length, True)
|
||||
token.markup = markup
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,69 @@
|
||||
"""Atex heading (#, ##, ...)"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
ch: str | None = state.src[pos]
|
||||
|
||||
if ch != "#" or pos >= maximum:
|
||||
return False
|
||||
|
||||
# count heading level
|
||||
level = 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.src[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
while ch == "#" and pos < maximum and level <= 6:
|
||||
level += 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.src[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
|
||||
if level > 6 or (pos < maximum and not isStrSpace(ch)):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Let's cut tails like ' ### ' from the end of string
|
||||
|
||||
maximum = state.skipSpacesBack(maximum, pos)
|
||||
tmp = state.skipCharsStrBack(maximum, "#", pos)
|
||||
if tmp > pos and isStrSpace(state.src[tmp - 1]):
|
||||
maximum = tmp
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = "########"[:level]
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = state.src[pos:maximum].strip()
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = "########"[:level]
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,56 @@
|
||||
"""Horizontal rule
|
||||
|
||||
At least 3 of these characters on a line * - _
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
try:
|
||||
marker = state.src[pos]
|
||||
except IndexError:
|
||||
return False
|
||||
pos += 1
|
||||
|
||||
# Check hr marker
|
||||
if marker not in ("*", "-", "_"):
|
||||
return False
|
||||
|
||||
# markers can be mixed with spaces, but there should be at least 3 of them
|
||||
|
||||
cnt = 1
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
if ch != marker and not isStrSpace(ch):
|
||||
return False
|
||||
if ch == marker:
|
||||
cnt += 1
|
||||
|
||||
if cnt < 3:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("hr", "hr", 0)
|
||||
token.map = [startLine, state.line]
|
||||
token.markup = marker * (cnt + 1)
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,90 @@
|
||||
# HTML block
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ..common.html_blocks import block_names
|
||||
from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# An array of opening and corresponding closing sequences for html tags,
|
||||
# last argument defines whether it can terminate a paragraph or not
|
||||
HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [
|
||||
(
|
||||
re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
|
||||
re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
|
||||
True,
|
||||
),
|
||||
(re.compile(r"^<!--"), re.compile(r"-->"), True),
|
||||
(re.compile(r"^<\?"), re.compile(r"\?>"), True),
|
||||
(re.compile(r"^<![A-Z]"), re.compile(r">"), True),
|
||||
(re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
|
||||
(
|
||||
re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
|
||||
re.compile(r"^$"),
|
||||
True,
|
||||
),
|
||||
(re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
|
||||
]
|
||||
|
||||
|
||||
def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if not state.md.options.get("html", None):
|
||||
return False
|
||||
|
||||
if state.src[pos] != "<":
|
||||
return False
|
||||
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
html_seq = None
|
||||
for HTML_SEQUENCE in HTML_SEQUENCES:
|
||||
if HTML_SEQUENCE[0].search(lineText):
|
||||
html_seq = HTML_SEQUENCE
|
||||
break
|
||||
|
||||
if not html_seq:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
# true if this sequence can be a terminator, false otherwise
|
||||
return html_seq[2]
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
# If we are here - we detected HTML block.
|
||||
# Let's roll down till block end.
|
||||
if not html_seq[1].search(lineText):
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
if html_seq[1].search(lineText):
|
||||
if len(lineText) != 0:
|
||||
nextLine += 1
|
||||
break
|
||||
nextLine += 1
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("html_block", "", 0)
|
||||
token.map = [startLine, nextLine]
|
||||
token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,86 @@
|
||||
# lheading (---, ==)
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
level = None
|
||||
nextLine = startLine + 1
|
||||
ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph" # use paragraph to match terminatorRules
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine and not state.isEmpty(nextLine):
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Check for underline in setext header
|
||||
if state.sCount[nextLine] >= state.blkIndent:
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum:
|
||||
marker = state.src[pos]
|
||||
|
||||
if marker in ("-", "="):
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
# /* = */
|
||||
if pos >= maximum:
|
||||
level = 1 if marker == "=" else 2
|
||||
break
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if not level:
|
||||
# Didn't find valid underline
|
||||
return False
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = marker
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line - 1]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = marker
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,345 @@
|
||||
# Lists
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Search `[-+*][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipBulletListMarker(state: StateBlock, startLine: int) -> int:
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
try:
|
||||
marker = state.src[pos]
|
||||
except IndexError:
|
||||
return -1
|
||||
pos += 1
|
||||
|
||||
if marker not in ("*", "-", "+"):
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if not isStrSpace(ch):
|
||||
# " -test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
# Search `\d+[.)][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipOrderedListMarker(state: StateBlock, startLine: int) -> int:
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
pos = start
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# List marker should have at least 2 chars (digit + dot)
|
||||
if pos + 1 >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
|
||||
ch_ord = ord(ch)
|
||||
# /* 0 */ /* 9 */
|
||||
if ch_ord < 0x30 or ch_ord > 0x39:
|
||||
return -1
|
||||
|
||||
while True:
|
||||
# EOL -> fail
|
||||
if pos >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
|
||||
# /* 0 */ /* 9 */
|
||||
ch_ord = ord(ch)
|
||||
if ch_ord >= 0x30 and ch_ord <= 0x39:
|
||||
# List marker should have no more than 9 digits
|
||||
# (prevents integer overflow in browsers)
|
||||
if pos - start >= 10:
|
||||
return -1
|
||||
|
||||
continue
|
||||
|
||||
# found valid marker
|
||||
if ch in (")", "."):
|
||||
break
|
||||
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if not isStrSpace(ch):
|
||||
# " 1.test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
def markTightParagraphs(state: StateBlock, idx: int) -> None:
|
||||
level = state.level + 2
|
||||
|
||||
i = idx + 2
|
||||
length = len(state.tokens) - 2
|
||||
while i < length:
|
||||
if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
|
||||
state.tokens[i + 2].hidden = True
|
||||
state.tokens[i].hidden = True
|
||||
i += 2
|
||||
i += 1
|
||||
|
||||
|
||||
def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
isTerminatingParagraph = False
|
||||
tight = True
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
# Special case:
|
||||
# - item 1
|
||||
# - item 2
|
||||
# - item 3
|
||||
# - item 4
|
||||
# - this one is a paragraph continuation
|
||||
if (
|
||||
state.listIndent >= 0
|
||||
and state.sCount[startLine] - state.listIndent >= 4
|
||||
and state.sCount[startLine] < state.blkIndent
|
||||
):
|
||||
return False
|
||||
|
||||
# limit conditions when list can interrupt
|
||||
# a paragraph (validation mode only)
|
||||
# Next list item should still terminate previous list item
|
||||
#
|
||||
# This code can fail if plugins use blkIndent as well as lists,
|
||||
# but I hope the spec gets fixed long before that happens.
|
||||
#
|
||||
if (
|
||||
silent
|
||||
and state.parentType == "paragraph"
|
||||
and state.sCount[startLine] >= state.blkIndent
|
||||
):
|
||||
isTerminatingParagraph = True
|
||||
|
||||
# Detect list type and position after marker
|
||||
posAfterMarker = skipOrderedListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = True
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
markerValue = int(state.src[start : posAfterMarker - 1])
|
||||
|
||||
# If we're starting a new ordered list right after
|
||||
# a paragraph, it should start with 1.
|
||||
if isTerminatingParagraph and markerValue != 1:
|
||||
return False
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = False
|
||||
else:
|
||||
return False
|
||||
|
||||
# If we're starting a new unordered list right after
|
||||
# a paragraph, first line should not be empty.
|
||||
if (
|
||||
isTerminatingParagraph
|
||||
and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]
|
||||
):
|
||||
return False
|
||||
|
||||
# We should terminate list on style change. Remember first one to compare.
|
||||
markerChar = state.src[posAfterMarker - 1]
|
||||
|
||||
# For validation mode we can terminate immediately
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Start list
|
||||
listTokIdx = len(state.tokens)
|
||||
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_open", "ol", 1)
|
||||
if markerValue != 1:
|
||||
token.attrs = {"start": markerValue}
|
||||
|
||||
else:
|
||||
token = state.push("bullet_list_open", "ul", 1)
|
||||
|
||||
token.map = listLines = [startLine, 0]
|
||||
token.markup = markerChar
|
||||
|
||||
#
|
||||
# Iterate list items
|
||||
#
|
||||
|
||||
nextLine = startLine
|
||||
prevEmptyEnd = False
|
||||
terminatorRules = state.md.block.ruler.getRules("list")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "list"
|
||||
|
||||
while nextLine < endLine:
|
||||
pos = posAfterMarker
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
initial = offset = (
|
||||
state.sCount[nextLine]
|
||||
+ posAfterMarker
|
||||
- (state.bMarks[startLine] + state.tShift[startLine])
|
||||
)
|
||||
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if ch == "\t":
|
||||
offset += 4 - (offset + state.bsCount[nextLine]) % 4
|
||||
elif ch == " ":
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
contentStart = pos
|
||||
|
||||
# trimming space in "- \n 3" case, indent is 1 here
|
||||
indentAfterMarker = 1 if contentStart >= maximum else offset - initial
|
||||
|
||||
# If we have more than 4 spaces, the indent is 1
|
||||
# (the rest is just indented code block)
|
||||
if indentAfterMarker > 4:
|
||||
indentAfterMarker = 1
|
||||
|
||||
# " - test"
|
||||
# ^^^^^ - calculating total length of this thing
|
||||
indent = initial + indentAfterMarker
|
||||
|
||||
# Run subparser & write tokens
|
||||
token = state.push("list_item_open", "li", 1)
|
||||
token.markup = markerChar
|
||||
token.map = itemLines = [startLine, 0]
|
||||
if isOrdered:
|
||||
token.info = state.src[start : posAfterMarker - 1]
|
||||
|
||||
# change current state, then restore it after parser subcall
|
||||
oldTight = state.tight
|
||||
oldTShift = state.tShift[startLine]
|
||||
oldSCount = state.sCount[startLine]
|
||||
|
||||
# - example list
|
||||
# ^ listIndent position will be here
|
||||
# ^ blkIndent position will be here
|
||||
#
|
||||
oldListIndent = state.listIndent
|
||||
state.listIndent = state.blkIndent
|
||||
state.blkIndent = indent
|
||||
|
||||
state.tight = True
|
||||
state.tShift[startLine] = contentStart - state.bMarks[startLine]
|
||||
state.sCount[startLine] = offset
|
||||
|
||||
if contentStart >= maximum and state.isEmpty(startLine + 1):
|
||||
# workaround for this case
|
||||
# (list item is empty, list terminates before "foo"):
|
||||
# ~~~~~~~~
|
||||
# -
|
||||
#
|
||||
# foo
|
||||
# ~~~~~~~~
|
||||
state.line = min(state.line + 2, endLine)
|
||||
else:
|
||||
# NOTE in list.js this was:
|
||||
# state.md.block.tokenize(state, startLine, endLine, True)
|
||||
# but tokeniz does not take the final parameter
|
||||
state.md.block.tokenize(state, startLine, endLine)
|
||||
|
||||
# If any of list item is tight, mark list as tight
|
||||
if (not state.tight) or prevEmptyEnd:
|
||||
tight = False
|
||||
|
||||
# Item become loose if finish with empty line,
|
||||
# but we should filter last element, because it means list finish
|
||||
prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
|
||||
|
||||
state.blkIndent = state.listIndent
|
||||
state.listIndent = oldListIndent
|
||||
state.tShift[startLine] = oldTShift
|
||||
state.sCount[startLine] = oldSCount
|
||||
state.tight = oldTight
|
||||
|
||||
token = state.push("list_item_close", "li", -1)
|
||||
token.markup = markerChar
|
||||
|
||||
nextLine = startLine = state.line
|
||||
itemLines[1] = nextLine
|
||||
|
||||
if nextLine >= endLine:
|
||||
break
|
||||
|
||||
contentStart = state.bMarks[startLine]
|
||||
|
||||
#
|
||||
# Try to check if list is terminated or continued.
|
||||
#
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
break
|
||||
|
||||
# fail if terminating block found
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
# fail if list has another type
|
||||
if isOrdered:
|
||||
posAfterMarker = skipOrderedListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
start = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
|
||||
if markerChar != state.src[posAfterMarker - 1]:
|
||||
break
|
||||
|
||||
# Finalize list
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_close", "ol", -1)
|
||||
else:
|
||||
token = state.push("bullet_list_close", "ul", -1)
|
||||
|
||||
token.markup = markerChar
|
||||
|
||||
listLines[1] = nextLine
|
||||
state.line = nextLine
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
# mark paragraphs tight if needed
|
||||
if tight:
|
||||
markTightParagraphs(state, listTokIdx)
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,66 @@
|
||||
"""Paragraph."""
|
||||
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
nextLine = startLine + 1
|
||||
ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
endLine = state.lineMax
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph"
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
break
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("paragraph_open", "p", 1)
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("paragraph_close", "p", -1)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,235 @@
|
||||
import logging
|
||||
|
||||
from ..common.utils import charCodeAt, isSpace, normalizeReference
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
|
||||
)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
nextLine = startLine + 1
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if state.src[pos] != "[":
|
||||
return False
|
||||
|
||||
string = state.src[pos : maximum + 1]
|
||||
|
||||
# string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
maximum = len(string)
|
||||
|
||||
labelEnd = None
|
||||
pos = 1
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x5B: # /* [ */
|
||||
return False
|
||||
elif ch == 0x5D: # /* ] */
|
||||
labelEnd = pos
|
||||
break
|
||||
elif ch == 0x0A: # /* \n */
|
||||
if (lineContent := getNextLine(state, nextLine)) is not None:
|
||||
string += lineContent
|
||||
maximum = len(string)
|
||||
nextLine += 1
|
||||
elif ch == 0x5C: # /* \ */
|
||||
pos += 1
|
||||
if (
|
||||
pos < maximum
|
||||
and charCodeAt(string, pos) == 0x0A
|
||||
and (lineContent := getNextLine(state, nextLine)) is not None
|
||||
):
|
||||
string += lineContent
|
||||
maximum = len(string)
|
||||
nextLine += 1
|
||||
pos += 1
|
||||
|
||||
if (
|
||||
labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
|
||||
): # /* : */
|
||||
return False
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skip optional whitespace here
|
||||
pos = labelEnd + 2
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
if (lineContent := getNextLine(state, nextLine)) is not None:
|
||||
string += lineContent
|
||||
maximum = len(string)
|
||||
nextLine += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^^^^^ parse this
|
||||
destRes = state.md.helpers.parseLinkDestination(string, pos, maximum)
|
||||
if not destRes.ok:
|
||||
return False
|
||||
|
||||
href = state.md.normalizeLink(destRes.str)
|
||||
if not state.md.validateLink(href):
|
||||
return False
|
||||
|
||||
pos = destRes.pos
|
||||
|
||||
# save cursor state, we could require to rollback later
|
||||
destEndPos = pos
|
||||
destEndLineNo = nextLine
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skipping those spaces
|
||||
start = pos
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
if (lineContent := getNextLine(state, nextLine)) is not None:
|
||||
string += lineContent
|
||||
maximum = len(string)
|
||||
nextLine += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^ parse this
|
||||
titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, None)
|
||||
while titleRes.can_continue:
|
||||
if (lineContent := getNextLine(state, nextLine)) is None:
|
||||
break
|
||||
string += lineContent
|
||||
pos = maximum
|
||||
maximum = len(string)
|
||||
nextLine += 1
|
||||
titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, titleRes)
|
||||
|
||||
if pos < maximum and start != pos and titleRes.ok:
|
||||
title = titleRes.str
|
||||
pos = titleRes.pos
|
||||
else:
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
nextLine = destEndLineNo
|
||||
|
||||
# skip trailing spaces until the rest of the line
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A and title:
|
||||
# garbage at the end of the line after title,
|
||||
# but it could still be a valid reference if we roll back
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
nextLine = destEndLineNo
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A:
|
||||
# garbage at the end of the line
|
||||
return False
|
||||
|
||||
label = normalizeReference(string[1:labelEnd])
|
||||
if not label:
|
||||
# CommonMark 0.20 disallows empty labels
|
||||
return False
|
||||
|
||||
# Reference can not terminate anything. This check is for safety only.
|
||||
if silent:
|
||||
return True
|
||||
|
||||
if "references" not in state.env:
|
||||
state.env["references"] = {}
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
# note, this is not part of markdown-it JS, but is useful for renderers
|
||||
if state.md.options.get("inline_definitions", False):
|
||||
token = state.push("definition", "", 0)
|
||||
token.meta = {
|
||||
"id": label,
|
||||
"title": title,
|
||||
"url": href,
|
||||
"label": string[1:labelEnd],
|
||||
}
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
if label not in state.env["references"]:
|
||||
state.env["references"][label] = {
|
||||
"title": title,
|
||||
"href": href,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
else:
|
||||
state.env.setdefault("duplicate_refs", []).append(
|
||||
{
|
||||
"title": title,
|
||||
"href": href,
|
||||
"label": label,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def getNextLine(state: StateBlock, nextLine: int) -> None | str:
|
||||
endLine = state.lineMax
|
||||
|
||||
if nextLine >= endLine or state.isEmpty(nextLine):
|
||||
# empty line or end of input
|
||||
return None
|
||||
|
||||
isContinuation = False
|
||||
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.is_code_block(nextLine):
|
||||
isContinuation = True
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
isContinuation = True
|
||||
|
||||
if not isContinuation:
|
||||
terminatorRules = state.md.block.ruler.getRules("reference")
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "reference"
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
if terminate:
|
||||
# terminated by another block
|
||||
return None
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
# max + 1 explicitly includes the newline
|
||||
return state.src[pos : maximum + 1]
|
||||
@@ -0,0 +1,261 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from ..ruler import StateBase
|
||||
from ..token import Token
|
||||
from ..utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it.main import MarkdownIt
|
||||
|
||||
|
||||
class StateBlock(StateBase):
|
||||
def __init__(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
|
||||
) -> None:
|
||||
self.src = src
|
||||
|
||||
# link to parser instance
|
||||
self.md = md
|
||||
|
||||
self.env = env
|
||||
|
||||
#
|
||||
# Internal state variables
|
||||
#
|
||||
|
||||
self.tokens = tokens
|
||||
|
||||
self.bMarks: list[int] = [] # line begin offsets for fast jumps
|
||||
self.eMarks: list[int] = [] # line end offsets for fast jumps
|
||||
# offsets of the first non-space characters (tabs not expanded)
|
||||
self.tShift: list[int] = []
|
||||
self.sCount: list[int] = [] # indents for each line (tabs expanded)
|
||||
|
||||
# An amount of virtual spaces (tabs expanded) between beginning
|
||||
# of each line (bMarks) and real beginning of that line.
|
||||
#
|
||||
# It exists only as a hack because blockquotes override bMarks
|
||||
# losing information in the process.
|
||||
#
|
||||
# It's used only when expanding tabs, you can think about it as
|
||||
# an initial tab length, e.g. bsCount=21 applied to string `\t123`
|
||||
# means first tab should be expanded to 4-21%4 === 3 spaces.
|
||||
#
|
||||
self.bsCount: list[int] = []
|
||||
|
||||
# block parser variables
|
||||
self.blkIndent = 0 # required block content indent (for example, if we are
|
||||
# inside a list, it would be positioned after list marker)
|
||||
self.line = 0 # line index in src
|
||||
self.lineMax = 0 # lines count
|
||||
self.tight = False # loose/tight mode for lists
|
||||
self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
|
||||
self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
|
||||
|
||||
# can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
|
||||
# used in lists to determine if they interrupt a paragraph
|
||||
self.parentType = "root"
|
||||
|
||||
self.level = 0
|
||||
|
||||
# renderer
|
||||
self.result = ""
|
||||
|
||||
# Create caches
|
||||
# Generate markers.
|
||||
indent_found = False
|
||||
|
||||
start = pos = indent = offset = 0
|
||||
length = len(self.src)
|
||||
|
||||
for pos, character in enumerate(self.src):
|
||||
if not indent_found:
|
||||
if isStrSpace(character):
|
||||
indent += 1
|
||||
|
||||
if character == "\t":
|
||||
offset += 4 - offset % 4
|
||||
else:
|
||||
offset += 1
|
||||
continue
|
||||
else:
|
||||
indent_found = True
|
||||
|
||||
if character == "\n" or pos == length - 1:
|
||||
if character != "\n":
|
||||
pos += 1
|
||||
self.bMarks.append(start)
|
||||
self.eMarks.append(pos)
|
||||
self.tShift.append(indent)
|
||||
self.sCount.append(offset)
|
||||
self.bsCount.append(0)
|
||||
|
||||
indent_found = False
|
||||
indent = 0
|
||||
offset = 0
|
||||
start = pos + 1
|
||||
|
||||
# Push fake entry to simplify cache bounds checks
|
||||
self.bMarks.append(length)
|
||||
self.eMarks.append(length)
|
||||
self.tShift.append(0)
|
||||
self.sCount.append(0)
|
||||
self.bsCount.append(0)
|
||||
|
||||
self.lineMax = len(self.bMarks) - 1 # don't count last fake line
|
||||
|
||||
# pre-check if code blocks are enabled, to speed up is_code_block method
|
||||
self._code_enabled = "code" in self.md["block"].ruler.get_active_rules()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"{self.__class__.__name__}"
|
||||
f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
|
||||
)
|
||||
|
||||
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
|
||||
"""Push new token to "stream"."""
|
||||
token = Token(ttype, tag, nesting)
|
||||
token.block = True
|
||||
if nesting < 0:
|
||||
self.level -= 1 # closing tag
|
||||
token.level = self.level
|
||||
if nesting > 0:
|
||||
self.level += 1 # opening tag
|
||||
self.tokens.append(token)
|
||||
return token
|
||||
|
||||
def isEmpty(self, line: int) -> bool:
|
||||
"""."""
|
||||
return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
|
||||
|
||||
def skipEmptyLines(self, from_pos: int) -> int:
|
||||
"""."""
|
||||
while from_pos < self.lineMax:
|
||||
try:
|
||||
if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
|
||||
from_pos
|
||||
]:
|
||||
break
|
||||
except IndexError:
|
||||
pass
|
||||
from_pos += 1
|
||||
return from_pos
|
||||
|
||||
def skipSpaces(self, pos: int) -> int:
|
||||
"""Skip spaces from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.src[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if not isStrSpace(current):
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipSpacesBack(self, pos: int, minimum: int) -> int:
|
||||
"""Skip spaces from given position in reverse."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if not isStrSpace(self.src[pos]):
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def skipChars(self, pos: int, code: int) -> int:
|
||||
"""Skip character code from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.srcCharCode[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if current != code:
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipCharsStr(self, pos: int, ch: str) -> int:
|
||||
"""Skip character string from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.src[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if current != ch:
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
|
||||
"""Skip character code reverse from given position - 1."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if code != self.srcCharCode[pos]:
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int:
|
||||
"""Skip character string reverse from given position - 1."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if ch != self.src[pos]:
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
|
||||
"""Cut lines range from source."""
|
||||
line = begin
|
||||
if begin >= end:
|
||||
return ""
|
||||
|
||||
queue = [""] * (end - begin)
|
||||
|
||||
i = 1
|
||||
while line < end:
|
||||
lineIndent = 0
|
||||
lineStart = first = self.bMarks[line]
|
||||
last = (
|
||||
self.eMarks[line] + 1
|
||||
if line + 1 < end or keepLastLF
|
||||
else self.eMarks[line]
|
||||
)
|
||||
|
||||
while (first < last) and (lineIndent < indent):
|
||||
ch = self.src[first]
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
|
||||
else:
|
||||
lineIndent += 1
|
||||
elif first - lineStart < self.tShift[line]:
|
||||
lineIndent += 1
|
||||
else:
|
||||
break
|
||||
first += 1
|
||||
|
||||
if lineIndent > indent:
|
||||
# partially expanding tabs in code blocks, e.g '\t\tfoobar'
|
||||
# with indent=2 becomes ' \tfoobar'
|
||||
queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
|
||||
else:
|
||||
queue[i - 1] = self.src[first:last]
|
||||
|
||||
line += 1
|
||||
i += 1
|
||||
|
||||
return "".join(queue)
|
||||
|
||||
def is_code_block(self, line: int) -> bool:
|
||||
"""Check if line is a code block,
|
||||
i.e. the code block rule is enabled and text is indented by more than 3 spaces.
|
||||
"""
|
||||
return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4
|
||||
@@ -0,0 +1,250 @@
|
||||
# GFM table, https://github.github.com/gfm/#tables-extension-
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from ..common.utils import charStrAt, isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
headerLineRe = re.compile(r"^:?-+:?$")
|
||||
enclosingPipesRe = re.compile(r"^\||\|$")
|
||||
|
||||
# Limit the amount of empty autocompleted cells in a table,
|
||||
# see https://github.com/markdown-it/markdown-it/issues/1000,
|
||||
# Both pulldown-cmark and commonmark-hs limit the number of cells this way to ~200k.
|
||||
# We set it to 65k, which can expand user input by a factor of x370
|
||||
# (256x256 square is 1.8kB expanded into 650kB).
|
||||
MAX_AUTOCOMPLETED_CELLS = 0x10000
|
||||
|
||||
|
||||
def getLine(state: StateBlock, line: int) -> str:
|
||||
pos = state.bMarks[line] + state.tShift[line]
|
||||
maximum = state.eMarks[line]
|
||||
|
||||
# return state.src.substr(pos, max - pos)
|
||||
return state.src[pos:maximum]
|
||||
|
||||
|
||||
def escapedSplit(string: str) -> list[str]:
|
||||
result: list[str] = []
|
||||
pos = 0
|
||||
max = len(string)
|
||||
isEscaped = False
|
||||
lastPos = 0
|
||||
current = ""
|
||||
ch = charStrAt(string, pos)
|
||||
|
||||
while pos < max:
|
||||
if ch == "|":
|
||||
if not isEscaped:
|
||||
# pipe separating cells, '|'
|
||||
result.append(current + string[lastPos:pos])
|
||||
current = ""
|
||||
lastPos = pos + 1
|
||||
else:
|
||||
# escaped pipe, '\|'
|
||||
current += string[lastPos : pos - 1]
|
||||
lastPos = pos
|
||||
|
||||
isEscaped = ch == "\\"
|
||||
pos += 1
|
||||
|
||||
ch = charStrAt(string, pos)
|
||||
|
||||
result.append(current + string[lastPos:])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
tbodyLines = None
|
||||
|
||||
# should have at least two lines
|
||||
if startLine + 2 > endLine:
|
||||
return False
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
return False
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
return False
|
||||
|
||||
# first character of the second line should be '|', '-', ':',
|
||||
# and no other characters are allowed but spaces;
|
||||
# basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
first_ch = state.src[pos]
|
||||
pos += 1
|
||||
if first_ch not in ("|", "-", ":"):
|
||||
return False
|
||||
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
second_ch = state.src[pos]
|
||||
pos += 1
|
||||
if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
|
||||
return False
|
||||
|
||||
# if first character is '-', then second character must not be a space
|
||||
# (due to parsing ambiguity with list)
|
||||
if first_ch == "-" and isStrSpace(second_ch):
|
||||
return False
|
||||
|
||||
while pos < state.eMarks[nextLine]:
|
||||
ch = state.src[pos]
|
||||
|
||||
if ch not in ("|", "-", ":") and not isStrSpace(ch):
|
||||
return False
|
||||
|
||||
pos += 1
|
||||
|
||||
lineText = getLine(state, startLine + 1)
|
||||
|
||||
columns = lineText.split("|")
|
||||
aligns = []
|
||||
for i in range(len(columns)):
|
||||
t = columns[i].strip()
|
||||
if not t:
|
||||
# allow empty columns before and after table, but not in between columns;
|
||||
# e.g. allow ` |---| `, disallow ` ---||--- `
|
||||
if i == 0 or i == len(columns) - 1:
|
||||
continue
|
||||
else:
|
||||
return False
|
||||
|
||||
if not headerLineRe.search(t):
|
||||
return False
|
||||
if charStrAt(t, len(t) - 1) == ":":
|
||||
aligns.append("center" if charStrAt(t, 0) == ":" else "right")
|
||||
elif charStrAt(t, 0) == ":":
|
||||
aligns.append("left")
|
||||
else:
|
||||
aligns.append("")
|
||||
|
||||
lineText = getLine(state, startLine).strip()
|
||||
if "|" not in lineText:
|
||||
return False
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
# header row will define an amount of columns in the entire table,
|
||||
# and align row should be exactly the same (the rest of the rows can differ)
|
||||
columnCount = len(columns)
|
||||
if columnCount == 0 or columnCount != len(aligns):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "table"
|
||||
|
||||
# use 'blockquote' lists for termination because it's
|
||||
# the most similar to tables
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
token = state.push("table_open", "table", 1)
|
||||
token.map = tableLines = [startLine, 0]
|
||||
|
||||
token = state.push("thead_open", "thead", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
for i in range(len(columns)):
|
||||
token = state.push("th_open", "th", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [startLine, startLine + 1]
|
||||
token.content = columns[i].strip()
|
||||
token.children = []
|
||||
|
||||
token = state.push("th_close", "th", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
token = state.push("thead_close", "thead", -1)
|
||||
|
||||
autocompleted_cells = 0
|
||||
nextLine = startLine + 2
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
terminate = False
|
||||
for i in range(len(terminatorRules)):
|
||||
if terminatorRules[i](state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
lineText = getLine(state, nextLine).strip()
|
||||
if not lineText:
|
||||
break
|
||||
if state.is_code_block(nextLine):
|
||||
break
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
# note: autocomplete count can be negative if user specifies more columns than header,
|
||||
# but that does not affect intended use (which is limiting expansion)
|
||||
autocompleted_cells += columnCount - len(columns)
|
||||
if autocompleted_cells > MAX_AUTOCOMPLETED_CELLS:
|
||||
break
|
||||
|
||||
if nextLine == startLine + 2:
|
||||
token = state.push("tbody_open", "tbody", 1)
|
||||
token.map = tbodyLines = [startLine + 2, 0]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
|
||||
for i in range(columnCount):
|
||||
token = state.push("td_open", "td", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
try:
|
||||
token.content = columns[i].strip() if columns[i] else ""
|
||||
except IndexError:
|
||||
token.content = ""
|
||||
token.children = []
|
||||
|
||||
token = state.push("td_close", "td", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if tbodyLines:
|
||||
token = state.push("tbody_close", "tbody", -1)
|
||||
tbodyLines[1] = nextLine
|
||||
|
||||
token = state.push("table_close", "table", -1)
|
||||
|
||||
tableLines[1] = nextLine
|
||||
state.parentType = oldParentType
|
||||
state.line = nextLine
|
||||
return True
|
||||
@@ -0,0 +1,19 @@
|
||||
__all__ = (
|
||||
"StateCore",
|
||||
"block",
|
||||
"inline",
|
||||
"linkify",
|
||||
"normalize",
|
||||
"replace",
|
||||
"smartquotes",
|
||||
"text_join",
|
||||
)
|
||||
|
||||
from .block import block
|
||||
from .inline import inline
|
||||
from .linkify import linkify
|
||||
from .normalize import normalize
|
||||
from .replacements import replace
|
||||
from .smartquotes import smartquotes
|
||||
from .state_core import StateCore
|
||||
from .text_join import text_join
|
||||
@@ -0,0 +1,13 @@
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
|
||||
def block(state: StateCore) -> None:
|
||||
if state.inlineMode:
|
||||
token = Token("inline", "", 0)
|
||||
token.content = state.src
|
||||
token.map = [0, 1]
|
||||
token.children = []
|
||||
state.tokens.append(token)
|
||||
else:
|
||||
state.md.block.parse(state.src, state.md, state.env, state.tokens)
|
||||
@@ -0,0 +1,10 @@
|
||||
from .state_core import StateCore
|
||||
|
||||
|
||||
def inline(state: StateCore) -> None:
|
||||
"""Parse inlines"""
|
||||
for token in state.tokens:
|
||||
if token.type == "inline":
|
||||
if token.children is None:
|
||||
token.children = []
|
||||
state.md.inline.parse(token.content, state.md, state.env, token.children)
|
||||
@@ -0,0 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Protocol
|
||||
|
||||
from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
HTTP_RE = re.compile(r"^http://")
|
||||
MAILTO_RE = re.compile(r"^mailto:")
|
||||
TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def linkify(state: StateCore) -> None:
|
||||
"""Rule for identifying plain-text links."""
|
||||
if not state.md.options.linkify:
|
||||
return
|
||||
|
||||
if not state.md.linkify:
|
||||
raise ModuleNotFoundError("Linkify enabled but not installed.")
|
||||
|
||||
for inline_token in state.tokens:
|
||||
if inline_token.type != "inline" or not state.md.linkify.pretest(
|
||||
inline_token.content
|
||||
):
|
||||
continue
|
||||
|
||||
tokens = inline_token.children
|
||||
|
||||
htmlLinkLevel = 0
|
||||
|
||||
# We scan from the end, to keep position when new tags added.
|
||||
# Use reversed logic in links start/end match
|
||||
assert tokens is not None
|
||||
i = len(tokens)
|
||||
while i >= 1:
|
||||
i -= 1
|
||||
assert isinstance(tokens, list)
|
||||
currentToken = tokens[i]
|
||||
|
||||
# Skip content of markdown links
|
||||
if currentToken.type == "link_close":
|
||||
i -= 1
|
||||
while (
|
||||
tokens[i].level != currentToken.level
|
||||
and tokens[i].type != "link_open"
|
||||
):
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
# Skip content of html tag links
|
||||
if currentToken.type == "html_inline":
|
||||
if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
|
||||
htmlLinkLevel -= 1
|
||||
if isLinkClose(currentToken.content):
|
||||
htmlLinkLevel += 1
|
||||
if htmlLinkLevel > 0:
|
||||
continue
|
||||
|
||||
if currentToken.type == "text" and state.md.linkify.test(
|
||||
currentToken.content
|
||||
):
|
||||
text = currentToken.content
|
||||
links: list[_LinkType] = state.md.linkify.match(text) or []
|
||||
|
||||
# Now split string to nodes
|
||||
nodes = []
|
||||
level = currentToken.level
|
||||
lastPos = 0
|
||||
|
||||
# forbid escape sequence at the start of the string,
|
||||
# this avoids http\://example.com/ from being linkified as
|
||||
# http:<a href="//example.com/">//example.com/</a>
|
||||
if (
|
||||
links
|
||||
and links[0].index == 0
|
||||
and i > 0
|
||||
and tokens[i - 1].type == "text_special"
|
||||
):
|
||||
links = links[1:]
|
||||
|
||||
for link in links:
|
||||
url = link.url
|
||||
fullUrl = state.md.normalizeLink(url)
|
||||
if not state.md.validateLink(fullUrl):
|
||||
continue
|
||||
|
||||
urlText = link.text
|
||||
|
||||
# Linkifier might send raw hostnames like "example.com", where url
|
||||
# starts with domain name. So we prepend http:// in those cases,
|
||||
# and remove it afterwards.
|
||||
if not link.schema:
|
||||
urlText = HTTP_RE.sub(
|
||||
"", state.md.normalizeLinkText("http://" + urlText)
|
||||
)
|
||||
elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText):
|
||||
urlText = MAILTO_RE.sub(
|
||||
"", state.md.normalizeLinkText("mailto:" + urlText)
|
||||
)
|
||||
else:
|
||||
urlText = state.md.normalizeLinkText(urlText)
|
||||
|
||||
pos = link.index
|
||||
|
||||
if pos > lastPos:
|
||||
token = Token("text", "", 0)
|
||||
token.content = text[lastPos:pos]
|
||||
token.level = level
|
||||
nodes.append(token)
|
||||
|
||||
token = Token("link_open", "a", 1)
|
||||
token.attrs = {"href": fullUrl}
|
||||
token.level = level
|
||||
level += 1
|
||||
token.markup = "linkify"
|
||||
token.info = "auto"
|
||||
nodes.append(token)
|
||||
|
||||
token = Token("text", "", 0)
|
||||
token.content = urlText
|
||||
token.level = level
|
||||
nodes.append(token)
|
||||
|
||||
token = Token("link_close", "a", -1)
|
||||
level -= 1
|
||||
token.level = level
|
||||
token.markup = "linkify"
|
||||
token.info = "auto"
|
||||
nodes.append(token)
|
||||
|
||||
lastPos = link.last_index
|
||||
|
||||
if lastPos < len(text):
|
||||
token = Token("text", "", 0)
|
||||
token.content = text[lastPos:]
|
||||
token.level = level
|
||||
nodes.append(token)
|
||||
|
||||
inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes)
|
||||
|
||||
|
||||
class _LinkType(Protocol):
|
||||
url: str
|
||||
text: str
|
||||
index: int
|
||||
last_index: int
|
||||
schema: str | None
|
||||
@@ -0,0 +1,19 @@
|
||||
"""Normalize input string."""
|
||||
|
||||
import re
|
||||
|
||||
from .state_core import StateCore
|
||||
|
||||
# https://spec.commonmark.org/0.29/#line-ending
|
||||
NEWLINES_RE = re.compile(r"\r\n?|\n")
|
||||
NULL_RE = re.compile(r"\0")
|
||||
|
||||
|
||||
def normalize(state: StateCore) -> None:
|
||||
# Normalize newlines
|
||||
string = NEWLINES_RE.sub("\n", state.src)
|
||||
|
||||
# Replace NULL characters
|
||||
string = NULL_RE.sub("\ufffd", string)
|
||||
|
||||
state.src = string
|
||||
@@ -0,0 +1,127 @@
|
||||
"""Simple typographic replacements
|
||||
|
||||
* ``(c)``, ``(C)`` → ©
|
||||
* ``(tm)``, ``(TM)`` → ™
|
||||
* ``(r)``, ``(R)`` → ®
|
||||
* ``+-`` → ±
|
||||
* ``...`` → …
|
||||
* ``?....`` → ?..
|
||||
* ``!....`` → !..
|
||||
* ``????????`` → ???
|
||||
* ``!!!!!`` → !!!
|
||||
* ``,,,`` → ,
|
||||
* ``--`` → &ndash
|
||||
* ``---`` → &mdash
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# TODO:
|
||||
# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
|
||||
# - multiplication 2 x 4 -> 2 × 4
|
||||
|
||||
RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
|
||||
|
||||
# Workaround for phantomjs - need regex without /g flag,
|
||||
# or root check will fail every second time
|
||||
# SCOPED_ABBR_TEST_RE = r"\((c|tm|r)\)"
|
||||
|
||||
SCOPED_ABBR_RE = re.compile(r"\((c|tm|r)\)", flags=re.IGNORECASE)
|
||||
|
||||
PLUS_MINUS_RE = re.compile(r"\+-")
|
||||
|
||||
ELLIPSIS_RE = re.compile(r"\.{2,}")
|
||||
|
||||
ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
|
||||
|
||||
QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
|
||||
|
||||
COMMA_RE = re.compile(r",{2,}")
|
||||
|
||||
EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
|
||||
|
||||
EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
|
||||
|
||||
EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
|
||||
|
||||
|
||||
SCOPED_ABBR = {"c": "©", "r": "®", "tm": "™"}
|
||||
|
||||
|
||||
def replaceFn(match: re.Match[str]) -> str:
|
||||
return SCOPED_ABBR[match.group(1).lower()]
|
||||
|
||||
|
||||
def replace_scoped(inlineTokens: list[Token]) -> None:
|
||||
inside_autolink = 0
|
||||
|
||||
for token in inlineTokens:
|
||||
if token.type == "text" and not inside_autolink:
|
||||
token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
|
||||
|
||||
if token.type == "link_open" and token.info == "auto":
|
||||
inside_autolink -= 1
|
||||
|
||||
if token.type == "link_close" and token.info == "auto":
|
||||
inside_autolink += 1
|
||||
|
||||
|
||||
def replace_rare(inlineTokens: list[Token]) -> None:
|
||||
inside_autolink = 0
|
||||
|
||||
for token in inlineTokens:
|
||||
if (
|
||||
token.type == "text"
|
||||
and (not inside_autolink)
|
||||
and RARE_RE.search(token.content)
|
||||
):
|
||||
# +- -> ±
|
||||
token.content = PLUS_MINUS_RE.sub("±", token.content)
|
||||
|
||||
# .., ..., ....... -> …
|
||||
token.content = ELLIPSIS_RE.sub("…", token.content)
|
||||
|
||||
# but ?..... & !..... -> ?.. & !..
|
||||
token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub("\\1..", token.content)
|
||||
token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
|
||||
|
||||
# ,, ,,, ,,,, -> ,
|
||||
token.content = COMMA_RE.sub(",", token.content)
|
||||
|
||||
# em-dash
|
||||
token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
|
||||
|
||||
# en-dash
|
||||
token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
|
||||
token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
|
||||
|
||||
if token.type == "link_open" and token.info == "auto":
|
||||
inside_autolink -= 1
|
||||
|
||||
if token.type == "link_close" and token.info == "auto":
|
||||
inside_autolink += 1
|
||||
|
||||
|
||||
def replace(state: StateCore) -> None:
|
||||
if not state.md.options.typographer:
|
||||
return
|
||||
|
||||
for token in state.tokens:
|
||||
if token.type != "inline":
|
||||
continue
|
||||
if token.children is None:
|
||||
continue
|
||||
|
||||
if SCOPED_ABBR_RE.search(token.content):
|
||||
replace_scoped(token.children)
|
||||
|
||||
if RARE_RE.search(token.content):
|
||||
replace_rare(token.children)
|
||||
@@ -0,0 +1,202 @@
|
||||
"""Convert straight quotation marks to typographic ones"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
QUOTE_TEST_RE = re.compile(r"['\"]")
|
||||
QUOTE_RE = re.compile(r"['\"]")
|
||||
APOSTROPHE = "\u2019" # ’
|
||||
|
||||
|
||||
def replaceAt(string: str, index: int, ch: str) -> str:
|
||||
# When the index is negative, the behavior is different from the js version.
|
||||
# But basically, the index will not be negative.
|
||||
assert index >= 0
|
||||
return string[:index] + ch + string[index + 1 :]
|
||||
|
||||
|
||||
def process_inlines(tokens: list[Token], state: StateCore) -> None:
|
||||
stack: list[dict[str, Any]] = []
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
thisLevel = token.level
|
||||
|
||||
j = 0
|
||||
for j in range(len(stack))[::-1]:
|
||||
if stack[j]["level"] <= thisLevel:
|
||||
break
|
||||
else:
|
||||
# When the loop is terminated without a "break".
|
||||
# Subtract 1 to get the same index as the js version.
|
||||
j -= 1
|
||||
|
||||
stack = stack[: j + 1]
|
||||
|
||||
if token.type != "text":
|
||||
continue
|
||||
|
||||
text = token.content
|
||||
pos = 0
|
||||
maximum = len(text)
|
||||
|
||||
while pos < maximum:
|
||||
goto_outer = False
|
||||
lastIndex = pos
|
||||
t = QUOTE_RE.search(text[lastIndex:])
|
||||
if not t:
|
||||
break
|
||||
|
||||
canOpen = canClose = True
|
||||
pos = t.start(0) + lastIndex + 1
|
||||
isSingle = t.group(0) == "'"
|
||||
|
||||
# Find previous character,
|
||||
# default to space if it's the beginning of the line
|
||||
lastChar: None | int = 0x20
|
||||
|
||||
if t.start(0) + lastIndex - 1 >= 0:
|
||||
lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)
|
||||
else:
|
||||
for j in range(i)[::-1]:
|
||||
if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
|
||||
break
|
||||
# should skip all tokens except 'text', 'html_inline' or 'code_inline'
|
||||
if not tokens[j].content:
|
||||
continue
|
||||
|
||||
lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)
|
||||
break
|
||||
|
||||
# Find next character,
|
||||
# default to space if it's the end of the line
|
||||
nextChar: None | int = 0x20
|
||||
|
||||
if pos < maximum:
|
||||
nextChar = charCodeAt(text, pos)
|
||||
else:
|
||||
for j in range(i + 1, len(tokens)):
|
||||
# nextChar defaults to 0x20
|
||||
if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
|
||||
break
|
||||
# should skip all tokens except 'text', 'html_inline' or 'code_inline'
|
||||
if not tokens[j].content:
|
||||
continue
|
||||
|
||||
nextChar = charCodeAt(tokens[j].content, 0)
|
||||
break
|
||||
|
||||
isLastPunctChar = lastChar is not None and (
|
||||
isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
|
||||
)
|
||||
isNextPunctChar = nextChar is not None and (
|
||||
isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
|
||||
)
|
||||
|
||||
isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar)
|
||||
isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar)
|
||||
|
||||
if isNextWhiteSpace: # noqa: SIM114
|
||||
canOpen = False
|
||||
elif isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar):
|
||||
canOpen = False
|
||||
|
||||
if isLastWhiteSpace: # noqa: SIM114
|
||||
canClose = False
|
||||
elif isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar):
|
||||
canClose = False
|
||||
|
||||
if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102
|
||||
if (
|
||||
lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39
|
||||
): # 0x30: 0, 0x39: 9
|
||||
# special case: 1"" - count first quote as an inch
|
||||
canClose = canOpen = False
|
||||
|
||||
if canOpen and canClose:
|
||||
# Replace quotes in the middle of punctuation sequence, but not
|
||||
# in the middle of the words, i.e.:
|
||||
#
|
||||
# 1. foo " bar " baz - not replaced
|
||||
# 2. foo-"-bar-"-baz - replaced
|
||||
# 3. foo"bar"baz - not replaced
|
||||
canOpen = isLastPunctChar
|
||||
canClose = isNextPunctChar
|
||||
|
||||
if not canOpen and not canClose:
|
||||
# middle of word
|
||||
if isSingle:
|
||||
token.content = replaceAt(
|
||||
token.content, t.start(0) + lastIndex, APOSTROPHE
|
||||
)
|
||||
continue
|
||||
|
||||
if canClose:
|
||||
# this could be a closing quote, rewind the stack to get a match
|
||||
for j in range(len(stack))[::-1]:
|
||||
item = stack[j]
|
||||
if stack[j]["level"] < thisLevel:
|
||||
break
|
||||
if item["single"] == isSingle and stack[j]["level"] == thisLevel:
|
||||
item = stack[j]
|
||||
|
||||
if isSingle:
|
||||
openQuote = state.md.options.quotes[2]
|
||||
closeQuote = state.md.options.quotes[3]
|
||||
else:
|
||||
openQuote = state.md.options.quotes[0]
|
||||
closeQuote = state.md.options.quotes[1]
|
||||
|
||||
# replace token.content *before* tokens[item.token].content,
|
||||
# because, if they are pointing at the same token, replaceAt
|
||||
# could mess up indices when quote length != 1
|
||||
token.content = replaceAt(
|
||||
token.content, t.start(0) + lastIndex, closeQuote
|
||||
)
|
||||
tokens[item["token"]].content = replaceAt(
|
||||
tokens[item["token"]].content, item["pos"], openQuote
|
||||
)
|
||||
|
||||
pos += len(closeQuote) - 1
|
||||
if item["token"] == i:
|
||||
pos += len(openQuote) - 1
|
||||
|
||||
text = token.content
|
||||
maximum = len(text)
|
||||
|
||||
stack = stack[:j]
|
||||
goto_outer = True
|
||||
break
|
||||
if goto_outer:
|
||||
goto_outer = False
|
||||
continue
|
||||
|
||||
if canOpen:
|
||||
stack.append(
|
||||
{
|
||||
"token": i,
|
||||
"pos": t.start(0) + lastIndex,
|
||||
"single": isSingle,
|
||||
"level": thisLevel,
|
||||
}
|
||||
)
|
||||
elif canClose and isSingle:
|
||||
token.content = replaceAt(
|
||||
token.content, t.start(0) + lastIndex, APOSTROPHE
|
||||
)
|
||||
|
||||
|
||||
def smartquotes(state: StateCore) -> None:
|
||||
if not state.md.options.typographer:
|
||||
return
|
||||
|
||||
for token in state.tokens:
|
||||
if token.type != "inline" or not QUOTE_RE.search(token.content):
|
||||
continue
|
||||
if token.children is not None:
|
||||
process_inlines(token.children, state)
|
||||
@@ -0,0 +1,25 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ..ruler import StateBase
|
||||
from ..token import Token
|
||||
from ..utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
class StateCore(StateBase):
|
||||
def __init__(
|
||||
self,
|
||||
src: str,
|
||||
md: MarkdownIt,
|
||||
env: EnvType,
|
||||
tokens: list[Token] | None = None,
|
||||
) -> None:
|
||||
self.src = src
|
||||
self.md = md # link to parser instance
|
||||
self.env = env
|
||||
self.tokens: list[Token] = tokens or []
|
||||
self.inlineMode = False
|
||||
@@ -0,0 +1,35 @@
|
||||
"""Join raw text tokens with the rest of the text
|
||||
|
||||
This is set as a separate rule to provide an opportunity for plugins
|
||||
to run text replacements after text join, but before escape join.
|
||||
|
||||
For example, `\\:)` shouldn't be replaced with an emoji.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
|
||||
def text_join(state: StateCore) -> None:
|
||||
"""Join raw text for escape sequences (`text_special`) tokens with the rest of the text"""
|
||||
|
||||
for inline_token in state.tokens[:]:
|
||||
if inline_token.type != "inline":
|
||||
continue
|
||||
|
||||
# convert text_special to text and join all adjacent text nodes
|
||||
new_tokens: list[Token] = []
|
||||
for child_token in inline_token.children or []:
|
||||
if child_token.type == "text_special":
|
||||
child_token.type = "text"
|
||||
if (
|
||||
child_token.type == "text"
|
||||
and new_tokens
|
||||
and new_tokens[-1].type == "text"
|
||||
):
|
||||
new_tokens[-1].content += child_token.content
|
||||
else:
|
||||
new_tokens.append(child_token)
|
||||
inline_token.children = new_tokens
|
||||
@@ -0,0 +1,31 @@
|
||||
__all__ = (
|
||||
"StateInline",
|
||||
"autolink",
|
||||
"backtick",
|
||||
"emphasis",
|
||||
"entity",
|
||||
"escape",
|
||||
"fragments_join",
|
||||
"html_inline",
|
||||
"image",
|
||||
"link",
|
||||
"link_pairs",
|
||||
"linkify",
|
||||
"newline",
|
||||
"strikethrough",
|
||||
"text",
|
||||
)
|
||||
from . import emphasis, strikethrough
|
||||
from .autolink import autolink
|
||||
from .backticks import backtick
|
||||
from .balance_pairs import link_pairs
|
||||
from .entity import entity
|
||||
from .escape import escape
|
||||
from .fragments_join import fragments_join
|
||||
from .html_inline import html_inline
|
||||
from .image import image
|
||||
from .link import link
|
||||
from .linkify import linkify
|
||||
from .newline import newline
|
||||
from .state_inline import StateInline
|
||||
from .text import text
|
||||
@@ -0,0 +1,77 @@
|
||||
# Process autolinks '<protocol:...>'
|
||||
import re
|
||||
|
||||
from .state_inline import StateInline
|
||||
|
||||
EMAIL_RE = re.compile(
|
||||
r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$"
|
||||
)
|
||||
AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$")
|
||||
|
||||
|
||||
def autolink(state: StateInline, silent: bool) -> bool:
|
||||
pos = state.pos
|
||||
|
||||
if state.src[pos] != "<":
|
||||
return False
|
||||
|
||||
start = state.pos
|
||||
maximum = state.posMax
|
||||
|
||||
while True:
|
||||
pos += 1
|
||||
if pos >= maximum:
|
||||
return False
|
||||
|
||||
ch = state.src[pos]
|
||||
|
||||
if ch == "<":
|
||||
return False
|
||||
if ch == ">":
|
||||
break
|
||||
|
||||
url = state.src[start + 1 : pos]
|
||||
|
||||
if AUTOLINK_RE.search(url) is not None:
|
||||
fullUrl = state.md.normalizeLink(url)
|
||||
if not state.md.validateLink(fullUrl):
|
||||
return False
|
||||
|
||||
if not silent:
|
||||
token = state.push("link_open", "a", 1)
|
||||
token.attrs = {"href": fullUrl}
|
||||
token.markup = "autolink"
|
||||
token.info = "auto"
|
||||
|
||||
token = state.push("text", "", 0)
|
||||
token.content = state.md.normalizeLinkText(url)
|
||||
|
||||
token = state.push("link_close", "a", -1)
|
||||
token.markup = "autolink"
|
||||
token.info = "auto"
|
||||
|
||||
state.pos += len(url) + 2
|
||||
return True
|
||||
|
||||
if EMAIL_RE.search(url) is not None:
|
||||
fullUrl = state.md.normalizeLink("mailto:" + url)
|
||||
if not state.md.validateLink(fullUrl):
|
||||
return False
|
||||
|
||||
if not silent:
|
||||
token = state.push("link_open", "a", 1)
|
||||
token.attrs = {"href": fullUrl}
|
||||
token.markup = "autolink"
|
||||
token.info = "auto"
|
||||
|
||||
token = state.push("text", "", 0)
|
||||
token.content = state.md.normalizeLinkText(url)
|
||||
|
||||
token = state.push("link_close", "a", -1)
|
||||
token.markup = "autolink"
|
||||
token.info = "auto"
|
||||
|
||||
state.pos += len(url) + 2
|
||||
return True
|
||||
|
||||
return False
|
||||
@@ -0,0 +1,72 @@
|
||||
# Parse backticks
|
||||
import re
|
||||
|
||||
from .state_inline import StateInline
|
||||
|
||||
regex = re.compile("^ (.+) $")
|
||||
|
||||
|
||||
def backtick(state: StateInline, silent: bool) -> bool:
|
||||
pos = state.pos
|
||||
|
||||
if state.src[pos] != "`":
|
||||
return False
|
||||
|
||||
start = pos
|
||||
pos += 1
|
||||
maximum = state.posMax
|
||||
|
||||
# scan marker length
|
||||
while pos < maximum and (state.src[pos] == "`"):
|
||||
pos += 1
|
||||
|
||||
marker = state.src[start:pos]
|
||||
openerLength = len(marker)
|
||||
|
||||
if state.backticksScanned and state.backticks.get(openerLength, 0) <= start:
|
||||
if not silent:
|
||||
state.pending += marker
|
||||
state.pos += openerLength
|
||||
return True
|
||||
|
||||
matchStart = matchEnd = pos
|
||||
|
||||
# Nothing found in the cache, scan until the end of the line (or until marker is found)
|
||||
while True:
|
||||
try:
|
||||
matchStart = state.src.index("`", matchEnd)
|
||||
except ValueError:
|
||||
break
|
||||
matchEnd = matchStart + 1
|
||||
|
||||
# scan marker length
|
||||
while matchEnd < maximum and (state.src[matchEnd] == "`"):
|
||||
matchEnd += 1
|
||||
|
||||
closerLength = matchEnd - matchStart
|
||||
|
||||
if closerLength == openerLength:
|
||||
# Found matching closer length.
|
||||
if not silent:
|
||||
token = state.push("code_inline", "code", 0)
|
||||
token.markup = marker
|
||||
token.content = state.src[pos:matchStart].replace("\n", " ")
|
||||
if (
|
||||
token.content.startswith(" ")
|
||||
and token.content.endswith(" ")
|
||||
and len(token.content.strip()) > 0
|
||||
):
|
||||
token.content = token.content[1:-1]
|
||||
state.pos = matchEnd
|
||||
return True
|
||||
|
||||
# Some different length found, put it in cache as upper limit of where closer can be found
|
||||
state.backticks[closerLength] = matchStart
|
||||
|
||||
# Scanned through the end, didn't find anything
|
||||
state.backticksScanned = True
|
||||
|
||||
if not silent:
|
||||
state.pending += marker
|
||||
state.pos += openerLength
|
||||
return True
|
||||
@@ -0,0 +1,138 @@
|
||||
"""Balance paired characters (*, _, etc) in inline tokens."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .state_inline import Delimiter, StateInline
|
||||
|
||||
|
||||
def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
|
||||
"""For each opening emphasis-like marker find a matching closing one."""
|
||||
if not delimiters:
|
||||
return
|
||||
|
||||
openersBottom = {}
|
||||
maximum = len(delimiters)
|
||||
|
||||
# headerIdx is the first delimiter of the current (where closer is) delimiter run
|
||||
headerIdx = 0
|
||||
lastTokenIdx = -2 # needs any value lower than -1
|
||||
jumps: list[int] = []
|
||||
closerIdx = 0
|
||||
while closerIdx < maximum:
|
||||
closer = delimiters[closerIdx]
|
||||
|
||||
jumps.append(0)
|
||||
|
||||
# markers belong to same delimiter run if:
|
||||
# - they have adjacent tokens
|
||||
# - AND markers are the same
|
||||
#
|
||||
if (
|
||||
delimiters[headerIdx].marker != closer.marker
|
||||
or lastTokenIdx != closer.token - 1
|
||||
):
|
||||
headerIdx = closerIdx
|
||||
lastTokenIdx = closer.token
|
||||
|
||||
# Length is only used for emphasis-specific "rule of 3",
|
||||
# if it's not defined (in strikethrough or 3rd party plugins),
|
||||
# we can default it to 0 to disable those checks.
|
||||
#
|
||||
closer.length = closer.length or 0
|
||||
|
||||
if not closer.close:
|
||||
closerIdx += 1
|
||||
continue
|
||||
|
||||
# Previously calculated lower bounds (previous fails)
|
||||
# for each marker, each delimiter length modulo 3,
|
||||
# and for whether this closer can be an opener;
|
||||
# https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
|
||||
if closer.marker not in openersBottom:
|
||||
openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
|
||||
|
||||
minOpenerIdx = openersBottom[closer.marker][
|
||||
(3 if closer.open else 0) + (closer.length % 3)
|
||||
]
|
||||
|
||||
openerIdx = headerIdx - jumps[headerIdx] - 1
|
||||
|
||||
newMinOpenerIdx = openerIdx
|
||||
|
||||
while openerIdx > minOpenerIdx:
|
||||
opener = delimiters[openerIdx]
|
||||
|
||||
if opener.marker != closer.marker:
|
||||
openerIdx -= jumps[openerIdx] + 1
|
||||
continue
|
||||
|
||||
if opener.open and opener.end < 0:
|
||||
isOddMatch = False
|
||||
|
||||
# from spec:
|
||||
#
|
||||
# If one of the delimiters can both open and close emphasis, then the
|
||||
# sum of the lengths of the delimiter runs containing the opening and
|
||||
# closing delimiters must not be a multiple of 3 unless both lengths
|
||||
# are multiples of 3.
|
||||
#
|
||||
if (
|
||||
(opener.close or closer.open)
|
||||
and ((opener.length + closer.length) % 3 == 0)
|
||||
and (opener.length % 3 != 0 or closer.length % 3 != 0)
|
||||
):
|
||||
isOddMatch = True
|
||||
|
||||
if not isOddMatch:
|
||||
# If previous delimiter cannot be an opener, we can safely skip
|
||||
# the entire sequence in future checks. This is required to make
|
||||
# sure algorithm has linear complexity (see *_*_*_*_*_... case).
|
||||
#
|
||||
if openerIdx > 0 and not delimiters[openerIdx - 1].open:
|
||||
lastJump = jumps[openerIdx - 1] + 1
|
||||
else:
|
||||
lastJump = 0
|
||||
|
||||
jumps[closerIdx] = closerIdx - openerIdx + lastJump
|
||||
jumps[openerIdx] = lastJump
|
||||
|
||||
closer.open = False
|
||||
opener.end = closerIdx
|
||||
opener.close = False
|
||||
newMinOpenerIdx = -1
|
||||
|
||||
# treat next token as start of run,
|
||||
# it optimizes skips in **<...>**a**<...>** pathological case
|
||||
lastTokenIdx = -2
|
||||
|
||||
break
|
||||
|
||||
openerIdx -= jumps[openerIdx] + 1
|
||||
|
||||
if newMinOpenerIdx != -1:
|
||||
# If match for this delimiter run failed, we want to set lower bound for
|
||||
# future lookups. This is required to make sure algorithm has linear
|
||||
# complexity.
|
||||
#
|
||||
# See details here:
|
||||
# https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
|
||||
#
|
||||
openersBottom[closer.marker][
|
||||
(3 if closer.open else 0) + ((closer.length or 0) % 3)
|
||||
] = newMinOpenerIdx
|
||||
|
||||
closerIdx += 1
|
||||
|
||||
|
||||
def link_pairs(state: StateInline) -> None:
|
||||
tokens_meta = state.tokens_meta
|
||||
maximum = len(state.tokens_meta)
|
||||
|
||||
processDelimiters(state, state.delimiters)
|
||||
|
||||
curr = 0
|
||||
while curr < maximum:
|
||||
curr_meta = tokens_meta[curr]
|
||||
if curr_meta and "delimiters" in curr_meta:
|
||||
processDelimiters(state, curr_meta["delimiters"])
|
||||
curr += 1
|
||||
@@ -0,0 +1,102 @@
|
||||
# Process *this* and _that_
|
||||
#
|
||||
from __future__ import annotations
|
||||
|
||||
from .state_inline import Delimiter, StateInline
|
||||
|
||||
|
||||
def tokenize(state: StateInline, silent: bool) -> bool:
|
||||
"""Insert each marker as a separate text token, and add it to delimiter list"""
|
||||
start = state.pos
|
||||
marker = state.src[start]
|
||||
|
||||
if silent:
|
||||
return False
|
||||
|
||||
if marker not in ("_", "*"):
|
||||
return False
|
||||
|
||||
scanned = state.scanDelims(state.pos, marker == "*")
|
||||
|
||||
for _ in range(scanned.length):
|
||||
token = state.push("text", "", 0)
|
||||
token.content = marker
|
||||
state.delimiters.append(
|
||||
Delimiter(
|
||||
marker=ord(marker),
|
||||
length=scanned.length,
|
||||
token=len(state.tokens) - 1,
|
||||
end=-1,
|
||||
open=scanned.can_open,
|
||||
close=scanned.can_close,
|
||||
)
|
||||
)
|
||||
|
||||
state.pos += scanned.length
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
|
||||
i = len(delimiters) - 1
|
||||
while i >= 0:
|
||||
startDelim = delimiters[i]
|
||||
|
||||
# /* _ */ /* * */
|
||||
if startDelim.marker != 0x5F and startDelim.marker != 0x2A:
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
# Process only opening markers
|
||||
if startDelim.end == -1:
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
endDelim = delimiters[startDelim.end]
|
||||
|
||||
# If the previous delimiter has the same marker and is adjacent to this one,
|
||||
# merge those into one strong delimiter.
|
||||
#
|
||||
# `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
|
||||
#
|
||||
isStrong = (
|
||||
i > 0
|
||||
and delimiters[i - 1].end == startDelim.end + 1
|
||||
# check that first two markers match and adjacent
|
||||
and delimiters[i - 1].marker == startDelim.marker
|
||||
and delimiters[i - 1].token == startDelim.token - 1
|
||||
# check that last two markers are adjacent (we can safely assume they match)
|
||||
and delimiters[startDelim.end + 1].token == endDelim.token + 1
|
||||
)
|
||||
|
||||
ch = chr(startDelim.marker)
|
||||
|
||||
token = state.tokens[startDelim.token]
|
||||
token.type = "strong_open" if isStrong else "em_open"
|
||||
token.tag = "strong" if isStrong else "em"
|
||||
token.nesting = 1
|
||||
token.markup = ch + ch if isStrong else ch
|
||||
token.content = ""
|
||||
|
||||
token = state.tokens[endDelim.token]
|
||||
token.type = "strong_close" if isStrong else "em_close"
|
||||
token.tag = "strong" if isStrong else "em"
|
||||
token.nesting = -1
|
||||
token.markup = ch + ch if isStrong else ch
|
||||
token.content = ""
|
||||
|
||||
if isStrong:
|
||||
state.tokens[delimiters[i - 1].token].content = ""
|
||||
state.tokens[delimiters[startDelim.end + 1].token].content = ""
|
||||
i -= 1
|
||||
|
||||
i -= 1
|
||||
|
||||
|
||||
def postProcess(state: StateInline) -> None:
|
||||
"""Walk through delimiter list and replace text tokens with tags."""
|
||||
_postProcess(state, state.delimiters)
|
||||
|
||||
for token in state.tokens_meta:
|
||||
if token and "delimiters" in token:
|
||||
_postProcess(state, token["delimiters"])
|
||||
@@ -0,0 +1,53 @@
|
||||
# Process html entity - {, ¯, ", ...
|
||||
import re
|
||||
|
||||
from ..common.entities import entities
|
||||
from ..common.utils import fromCodePoint, isValidEntityCode
|
||||
from .state_inline import StateInline
|
||||
|
||||
DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE)
|
||||
NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE)
|
||||
|
||||
|
||||
def entity(state: StateInline, silent: bool) -> bool:
|
||||
pos = state.pos
|
||||
maximum = state.posMax
|
||||
|
||||
if state.src[pos] != "&":
|
||||
return False
|
||||
|
||||
if pos + 1 >= maximum:
|
||||
return False
|
||||
|
||||
if state.src[pos + 1] == "#":
|
||||
if match := DIGITAL_RE.search(state.src[pos:]):
|
||||
if not silent:
|
||||
match1 = match.group(1)
|
||||
code = (
|
||||
int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10)
|
||||
)
|
||||
|
||||
token = state.push("text_special", "", 0)
|
||||
token.content = (
|
||||
fromCodePoint(code)
|
||||
if isValidEntityCode(code)
|
||||
else fromCodePoint(0xFFFD)
|
||||
)
|
||||
token.markup = match.group(0)
|
||||
token.info = "entity"
|
||||
|
||||
state.pos += len(match.group(0))
|
||||
return True
|
||||
|
||||
else:
|
||||
if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities:
|
||||
if not silent:
|
||||
token = state.push("text_special", "", 0)
|
||||
token.content = entities[match.group(1)]
|
||||
token.markup = match.group(0)
|
||||
token.info = "entity"
|
||||
|
||||
state.pos += len(match.group(0))
|
||||
return True
|
||||
|
||||
return False
|
||||
@@ -0,0 +1,93 @@
|
||||
"""
|
||||
Process escaped chars and hardbreaks
|
||||
"""
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_inline import StateInline
|
||||
|
||||
|
||||
def escape(state: StateInline, silent: bool) -> bool:
|
||||
"""Process escaped chars and hardbreaks."""
|
||||
pos = state.pos
|
||||
maximum = state.posMax
|
||||
|
||||
if state.src[pos] != "\\":
|
||||
return False
|
||||
|
||||
pos += 1
|
||||
|
||||
# '\' at the end of the inline block
|
||||
if pos >= maximum:
|
||||
return False
|
||||
|
||||
ch1 = state.src[pos]
|
||||
ch1_ord = ord(ch1)
|
||||
if ch1 == "\n":
|
||||
if not silent:
|
||||
state.push("hardbreak", "br", 0)
|
||||
pos += 1
|
||||
# skip leading whitespaces from next line
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
if not isStrSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
state.pos = pos
|
||||
return True
|
||||
|
||||
escapedStr = state.src[pos]
|
||||
|
||||
if ch1_ord >= 0xD800 and ch1_ord <= 0xDBFF and pos + 1 < maximum:
|
||||
ch2 = state.src[pos + 1]
|
||||
ch2_ord = ord(ch2)
|
||||
if ch2_ord >= 0xDC00 and ch2_ord <= 0xDFFF:
|
||||
escapedStr += ch2
|
||||
pos += 1
|
||||
|
||||
origStr = "\\" + escapedStr
|
||||
|
||||
if not silent:
|
||||
token = state.push("text_special", "", 0)
|
||||
token.content = escapedStr if ch1 in _ESCAPED else origStr
|
||||
token.markup = origStr
|
||||
token.info = "escape"
|
||||
|
||||
state.pos = pos + 1
|
||||
return True
|
||||
|
||||
|
||||
_ESCAPED = {
|
||||
"!",
|
||||
'"',
|
||||
"#",
|
||||
"$",
|
||||
"%",
|
||||
"&",
|
||||
"'",
|
||||
"(",
|
||||
")",
|
||||
"*",
|
||||
"+",
|
||||
",",
|
||||
"-",
|
||||
".",
|
||||
"/",
|
||||
":",
|
||||
";",
|
||||
"<",
|
||||
"=",
|
||||
">",
|
||||
"?",
|
||||
"@",
|
||||
"[",
|
||||
"\\",
|
||||
"]",
|
||||
"^",
|
||||
"_",
|
||||
"`",
|
||||
"{",
|
||||
"|",
|
||||
"}",
|
||||
"~",
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
from .state_inline import StateInline
|
||||
|
||||
|
||||
def fragments_join(state: StateInline) -> None:
|
||||
"""
|
||||
Clean up tokens after emphasis and strikethrough postprocessing:
|
||||
merge adjacent text nodes into one and re-calculate all token levels
|
||||
|
||||
This is necessary because initially emphasis delimiter markers (``*, _, ~``)
|
||||
are treated as their own separate text tokens. Then emphasis rule either
|
||||
leaves them as text (needed to merge with adjacent text) or turns them
|
||||
into opening/closing tags (which messes up levels inside).
|
||||
"""
|
||||
level = 0
|
||||
maximum = len(state.tokens)
|
||||
|
||||
curr = last = 0
|
||||
while curr < maximum:
|
||||
# re-calculate levels after emphasis/strikethrough turns some text nodes
|
||||
# into opening/closing tags
|
||||
if state.tokens[curr].nesting < 0:
|
||||
level -= 1 # closing tag
|
||||
state.tokens[curr].level = level
|
||||
if state.tokens[curr].nesting > 0:
|
||||
level += 1 # opening tag
|
||||
|
||||
if (
|
||||
state.tokens[curr].type == "text"
|
||||
and curr + 1 < maximum
|
||||
and state.tokens[curr + 1].type == "text"
|
||||
):
|
||||
# collapse two adjacent text nodes
|
||||
state.tokens[curr + 1].content = (
|
||||
state.tokens[curr].content + state.tokens[curr + 1].content
|
||||
)
|
||||
else:
|
||||
if curr != last:
|
||||
state.tokens[last] = state.tokens[curr]
|
||||
last += 1
|
||||
curr += 1
|
||||
|
||||
if curr != last:
|
||||
del state.tokens[last:]
|
||||
@@ -0,0 +1,43 @@
|
||||
# Process html tags
|
||||
from ..common.html_re import HTML_TAG_RE
|
||||
from ..common.utils import isLinkClose, isLinkOpen
|
||||
from .state_inline import StateInline
|
||||
|
||||
|
||||
def isLetter(ch: int) -> bool:
|
||||
lc = ch | 0x20 # to lower case
|
||||
# /* a */ and /* z */
|
||||
return (lc >= 0x61) and (lc <= 0x7A)
|
||||
|
||||
|
||||
def html_inline(state: StateInline, silent: bool) -> bool:
|
||||
pos = state.pos
|
||||
|
||||
if not state.md.options.get("html", None):
|
||||
return False
|
||||
|
||||
# Check start
|
||||
maximum = state.posMax
|
||||
if state.src[pos] != "<" or pos + 2 >= maximum:
|
||||
return False
|
||||
|
||||
# Quick fail on second char
|
||||
ch = state.src[pos + 1]
|
||||
if ch not in ("!", "?", "/") and not isLetter(ord(ch)): # /* / */
|
||||
return False
|
||||
|
||||
match = HTML_TAG_RE.search(state.src[pos:])
|
||||
if not match:
|
||||
return False
|
||||
|
||||
if not silent:
|
||||
token = state.push("html_inline", "", 0)
|
||||
token.content = state.src[pos : pos + len(match.group(0))]
|
||||
|
||||
if isLinkOpen(token.content):
|
||||
state.linkLevel += 1
|
||||
if isLinkClose(token.content):
|
||||
state.linkLevel -= 1
|
||||
|
||||
state.pos += len(match.group(0))
|
||||
return True
|
||||
@@ -0,0 +1,148 @@
|
||||
# Process 
|
||||
from __future__ import annotations
|
||||
|
||||
from ..common.utils import isStrSpace, normalizeReference
|
||||
from ..token import Token
|
||||
from .state_inline import StateInline
|
||||
|
||||
|
||||
def image(state: StateInline, silent: bool) -> bool:
|
||||
label = None
|
||||
href = ""
|
||||
oldPos = state.pos
|
||||
max = state.posMax
|
||||
|
||||
if state.src[state.pos] != "!":
|
||||
return False
|
||||
|
||||
if state.pos + 1 < state.posMax and state.src[state.pos + 1] != "[":
|
||||
return False
|
||||
|
||||
labelStart = state.pos + 2
|
||||
labelEnd = state.md.helpers.parseLinkLabel(state, state.pos + 1, False)
|
||||
|
||||
# parser failed to find ']', so it's not a valid link
|
||||
if labelEnd < 0:
|
||||
return False
|
||||
|
||||
pos = labelEnd + 1
|
||||
|
||||
if pos < max and state.src[pos] == "(":
|
||||
#
|
||||
# Inline link
|
||||
#
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^ skipping these spaces
|
||||
pos += 1
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
if not isStrSpace(ch) and ch != "\n":
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos >= max:
|
||||
return False
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^^^^^ parsing link destination
|
||||
start = pos
|
||||
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
|
||||
if res.ok:
|
||||
href = state.md.normalizeLink(res.str)
|
||||
if state.md.validateLink(href):
|
||||
pos = res.pos
|
||||
else:
|
||||
href = ""
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^ skipping these spaces
|
||||
start = pos
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
if not isStrSpace(ch) and ch != "\n":
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^^^^^^ parsing link title
|
||||
res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax, None)
|
||||
if pos < max and start != pos and res.ok:
|
||||
title = res.str
|
||||
pos = res.pos
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^ skipping these spaces
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
if not isStrSpace(ch) and ch != "\n":
|
||||
break
|
||||
pos += 1
|
||||
else:
|
||||
title = ""
|
||||
|
||||
if pos >= max or state.src[pos] != ")":
|
||||
state.pos = oldPos
|
||||
return False
|
||||
|
||||
pos += 1
|
||||
|
||||
else:
|
||||
#
|
||||
# Link reference
|
||||
#
|
||||
if "references" not in state.env:
|
||||
return False
|
||||
|
||||
# /* [ */
|
||||
if pos < max and state.src[pos] == "[":
|
||||
start = pos + 1
|
||||
pos = state.md.helpers.parseLinkLabel(state, pos)
|
||||
if pos >= 0:
|
||||
label = state.src[start:pos]
|
||||
pos += 1
|
||||
else:
|
||||
pos = labelEnd + 1
|
||||
else:
|
||||
pos = labelEnd + 1
|
||||
|
||||
# covers label == '' and label == undefined
|
||||
# (collapsed reference link and shortcut reference link respectively)
|
||||
if not label:
|
||||
label = state.src[labelStart:labelEnd]
|
||||
|
||||
label = normalizeReference(label)
|
||||
|
||||
ref = state.env["references"].get(label, None)
|
||||
if not ref:
|
||||
state.pos = oldPos
|
||||
return False
|
||||
|
||||
href = ref["href"]
|
||||
title = ref["title"]
|
||||
|
||||
#
|
||||
# We found the end of the link, and know for a fact it's a valid link
|
||||
# so all that's left to do is to call tokenizer.
|
||||
#
|
||||
if not silent:
|
||||
content = state.src[labelStart:labelEnd]
|
||||
|
||||
tokens: list[Token] = []
|
||||
state.md.inline.parse(content, state.md, state.env, tokens)
|
||||
|
||||
token = state.push("image", "img", 0)
|
||||
token.attrs = {"src": href, "alt": ""}
|
||||
token.children = tokens or None
|
||||
token.content = content
|
||||
|
||||
if title:
|
||||
token.attrSet("title", title)
|
||||
|
||||
# note, this is not part of markdown-it JS, but is useful for renderers
|
||||
if label and state.md.options.get("store_labels", False):
|
||||
token.meta["label"] = label
|
||||
|
||||
state.pos = pos
|
||||
state.posMax = max
|
||||
return True
|
||||
@@ -0,0 +1,149 @@
|
||||
# Process [link](<to> "stuff")
|
||||
|
||||
from ..common.utils import isStrSpace, normalizeReference
|
||||
from .state_inline import StateInline
|
||||
|
||||
|
||||
def link(state: StateInline, silent: bool) -> bool:
|
||||
href = ""
|
||||
title = ""
|
||||
label = None
|
||||
oldPos = state.pos
|
||||
maximum = state.posMax
|
||||
start = state.pos
|
||||
parseReference = True
|
||||
|
||||
if state.src[state.pos] != "[":
|
||||
return False
|
||||
|
||||
labelStart = state.pos + 1
|
||||
labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, True)
|
||||
|
||||
# parser failed to find ']', so it's not a valid link
|
||||
if labelEnd < 0:
|
||||
return False
|
||||
|
||||
pos = labelEnd + 1
|
||||
|
||||
if pos < maximum and state.src[pos] == "(":
|
||||
#
|
||||
# Inline link
|
||||
#
|
||||
|
||||
# might have found a valid shortcut link, disable reference parsing
|
||||
parseReference = False
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^ skipping these spaces
|
||||
pos += 1
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
if not isStrSpace(ch) and ch != "\n":
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos >= maximum:
|
||||
return False
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^^^^^ parsing link destination
|
||||
start = pos
|
||||
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
|
||||
if res.ok:
|
||||
href = state.md.normalizeLink(res.str)
|
||||
if state.md.validateLink(href):
|
||||
pos = res.pos
|
||||
else:
|
||||
href = ""
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^ skipping these spaces
|
||||
start = pos
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
if not isStrSpace(ch) and ch != "\n":
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^^^^^^ parsing link title
|
||||
res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
|
||||
if pos < maximum and start != pos and res.ok:
|
||||
title = res.str
|
||||
pos = res.pos
|
||||
|
||||
# [link]( <href> "title" )
|
||||
# ^^ skipping these spaces
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
if not isStrSpace(ch) and ch != "\n":
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos >= maximum or state.src[pos] != ")":
|
||||
# parsing a valid shortcut link failed, fallback to reference
|
||||
parseReference = True
|
||||
|
||||
pos += 1
|
||||
|
||||
if parseReference:
|
||||
#
|
||||
# Link reference
|
||||
#
|
||||
if "references" not in state.env:
|
||||
return False
|
||||
|
||||
if pos < maximum and state.src[pos] == "[":
|
||||
start = pos + 1
|
||||
pos = state.md.helpers.parseLinkLabel(state, pos)
|
||||
if pos >= 0:
|
||||
label = state.src[start:pos]
|
||||
pos += 1
|
||||
else:
|
||||
pos = labelEnd + 1
|
||||
|
||||
else:
|
||||
pos = labelEnd + 1
|
||||
|
||||
# covers label == '' and label == undefined
|
||||
# (collapsed reference link and shortcut reference link respectively)
|
||||
if not label:
|
||||
label = state.src[labelStart:labelEnd]
|
||||
|
||||
label = normalizeReference(label)
|
||||
|
||||
ref = state.env["references"].get(label, None)
|
||||
if not ref:
|
||||
state.pos = oldPos
|
||||
return False
|
||||
|
||||
href = ref["href"]
|
||||
title = ref["title"]
|
||||
|
||||
#
|
||||
# We found the end of the link, and know for a fact it's a valid link
|
||||
# so all that's left to do is to call tokenizer.
|
||||
#
|
||||
if not silent:
|
||||
state.pos = labelStart
|
||||
state.posMax = labelEnd
|
||||
|
||||
token = state.push("link_open", "a", 1)
|
||||
token.attrs = {"href": href}
|
||||
|
||||
if title:
|
||||
token.attrSet("title", title)
|
||||
|
||||
# note, this is not part of markdown-it JS, but is useful for renderers
|
||||
if label and state.md.options.get("store_labels", False):
|
||||
token.meta["label"] = label
|
||||
|
||||
state.linkLevel += 1
|
||||
state.md.inline.tokenize(state)
|
||||
state.linkLevel -= 1
|
||||
|
||||
token = state.push("link_close", "a", -1)
|
||||
|
||||
state.pos = pos
|
||||
state.posMax = maximum
|
||||
return True
|
||||
@@ -0,0 +1,62 @@
|
||||
"""Process links like https://example.org/"""
|
||||
|
||||
import re
|
||||
|
||||
from .state_inline import StateInline
|
||||
|
||||
# RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||
SCHEME_RE = re.compile(r"(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$", re.IGNORECASE)
|
||||
|
||||
|
||||
def linkify(state: StateInline, silent: bool) -> bool:
|
||||
"""Rule for identifying plain-text links."""
|
||||
if not state.md.options.linkify:
|
||||
return False
|
||||
if state.linkLevel > 0:
|
||||
return False
|
||||
if not state.md.linkify:
|
||||
raise ModuleNotFoundError("Linkify enabled but not installed.")
|
||||
|
||||
pos = state.pos
|
||||
maximum = state.posMax
|
||||
|
||||
if (
|
||||
(pos + 3) > maximum
|
||||
or state.src[pos] != ":"
|
||||
or state.src[pos + 1] != "/"
|
||||
or state.src[pos + 2] != "/"
|
||||
):
|
||||
return False
|
||||
|
||||
if not (match := SCHEME_RE.search(state.pending)):
|
||||
return False
|
||||
|
||||
proto = match.group(1)
|
||||
if not (link := state.md.linkify.match_at_start(state.src[pos - len(proto) :])):
|
||||
return False
|
||||
url: str = link.url
|
||||
|
||||
# disallow '*' at the end of the link (conflicts with emphasis)
|
||||
url = url.rstrip("*")
|
||||
|
||||
full_url = state.md.normalizeLink(url)
|
||||
if not state.md.validateLink(full_url):
|
||||
return False
|
||||
|
||||
if not silent:
|
||||
state.pending = state.pending[: -len(proto)]
|
||||
|
||||
token = state.push("link_open", "a", 1)
|
||||
token.attrs = {"href": full_url}
|
||||
token.markup = "linkify"
|
||||
token.info = "auto"
|
||||
|
||||
token = state.push("text", "", 0)
|
||||
token.content = state.md.normalizeLinkText(url)
|
||||
|
||||
token = state.push("link_close", "a", -1)
|
||||
token.markup = "linkify"
|
||||
token.info = "auto"
|
||||
|
||||
state.pos += len(url) - len(proto)
|
||||
return True
|
||||
@@ -0,0 +1,44 @@
|
||||
"""Proceess '\n'."""
|
||||
|
||||
from ..common.utils import charStrAt, isStrSpace
|
||||
from .state_inline import StateInline
|
||||
|
||||
|
||||
def newline(state: StateInline, silent: bool) -> bool:
|
||||
pos = state.pos
|
||||
|
||||
if state.src[pos] != "\n":
|
||||
return False
|
||||
|
||||
pmax = len(state.pending) - 1
|
||||
maximum = state.posMax
|
||||
|
||||
# ' \n' -> hardbreak
|
||||
# Lookup in pending chars is bad practice! Don't copy to other rules!
|
||||
# Pending string is stored in concat mode, indexed lookups will cause
|
||||
# conversion to flat mode.
|
||||
if not silent:
|
||||
if pmax >= 0 and charStrAt(state.pending, pmax) == " ":
|
||||
if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ":
|
||||
# Find whitespaces tail of pending chars.
|
||||
ws = pmax - 1
|
||||
while ws >= 1 and charStrAt(state.pending, ws - 1) == " ":
|
||||
ws -= 1
|
||||
state.pending = state.pending[:ws]
|
||||
|
||||
state.push("hardbreak", "br", 0)
|
||||
else:
|
||||
state.pending = state.pending[:-1]
|
||||
state.push("softbreak", "br", 0)
|
||||
|
||||
else:
|
||||
state.push("softbreak", "br", 0)
|
||||
|
||||
pos += 1
|
||||
|
||||
# skip heading spaces for next line
|
||||
while pos < maximum and isStrSpace(state.src[pos]):
|
||||
pos += 1
|
||||
|
||||
state.pos = pos
|
||||
return True
|
||||
@@ -0,0 +1,165 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
|
||||
from ..ruler import StateBase
|
||||
from ..token import Token
|
||||
from ..utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Delimiter:
|
||||
# Char code of the starting marker (number).
|
||||
marker: int
|
||||
|
||||
# Total length of these series of delimiters.
|
||||
length: int
|
||||
|
||||
# A position of the token this delimiter corresponds to.
|
||||
token: int
|
||||
|
||||
# If this delimiter is matched as a valid opener, `end` will be
|
||||
# equal to its position, otherwise it's `-1`.
|
||||
end: int
|
||||
|
||||
# Boolean flags that determine if this delimiter could open or close
|
||||
# an emphasis.
|
||||
open: bool
|
||||
close: bool
|
||||
|
||||
level: bool | None = None
|
||||
|
||||
|
||||
Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
|
||||
|
||||
|
||||
class StateInline(StateBase):
|
||||
def __init__(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
|
||||
) -> None:
|
||||
self.src = src
|
||||
self.env = env
|
||||
self.md = md
|
||||
self.tokens = outTokens
|
||||
self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)
|
||||
|
||||
self.pos = 0
|
||||
self.posMax = len(self.src)
|
||||
self.level = 0
|
||||
self.pending = ""
|
||||
self.pendingLevel = 0
|
||||
|
||||
# Stores { start: end } pairs. Useful for backtrack
|
||||
# optimization of pairs parse (emphasis, strikes).
|
||||
self.cache: dict[int, int] = {}
|
||||
|
||||
# List of emphasis-like delimiters for current tag
|
||||
self.delimiters: list[Delimiter] = []
|
||||
|
||||
# Stack of delimiter lists for upper level tags
|
||||
self._prev_delimiters: list[list[Delimiter]] = []
|
||||
|
||||
# backticklength => last seen position
|
||||
self.backticks: dict[int, int] = {}
|
||||
self.backticksScanned = False
|
||||
|
||||
# Counter used to disable inline linkify-it execution
|
||||
# inside <a> and markdown links
|
||||
self.linkLevel = 0
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"{self.__class__.__name__}"
|
||||
f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
|
||||
)
|
||||
|
||||
def pushPending(self) -> Token:
|
||||
token = Token("text", "", 0)
|
||||
token.content = self.pending
|
||||
token.level = self.pendingLevel
|
||||
self.tokens.append(token)
|
||||
self.pending = ""
|
||||
return token
|
||||
|
||||
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
|
||||
"""Push new token to "stream".
|
||||
If pending text exists - flush it as text token
|
||||
"""
|
||||
if self.pending:
|
||||
self.pushPending()
|
||||
|
||||
token = Token(ttype, tag, nesting)
|
||||
token_meta = None
|
||||
|
||||
if nesting < 0:
|
||||
# closing tag
|
||||
self.level -= 1
|
||||
self.delimiters = self._prev_delimiters.pop()
|
||||
|
||||
token.level = self.level
|
||||
|
||||
if nesting > 0:
|
||||
# opening tag
|
||||
self.level += 1
|
||||
self._prev_delimiters.append(self.delimiters)
|
||||
self.delimiters = []
|
||||
token_meta = {"delimiters": self.delimiters}
|
||||
|
||||
self.pendingLevel = self.level
|
||||
self.tokens.append(token)
|
||||
self.tokens_meta.append(token_meta)
|
||||
return token
|
||||
|
||||
def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
|
||||
"""
|
||||
Scan a sequence of emphasis-like markers, and determine whether
|
||||
it can start an emphasis sequence or end an emphasis sequence.
|
||||
|
||||
- start - position to scan from (it should point at a valid marker);
|
||||
- canSplitWord - determine if these markers can be found inside a word
|
||||
|
||||
"""
|
||||
pos = start
|
||||
maximum = self.posMax
|
||||
marker = self.src[start]
|
||||
|
||||
# treat beginning of the line as a whitespace
|
||||
lastChar = self.src[start - 1] if start > 0 else " "
|
||||
|
||||
while pos < maximum and self.src[pos] == marker:
|
||||
pos += 1
|
||||
|
||||
count = pos - start
|
||||
|
||||
# treat end of the line as a whitespace
|
||||
nextChar = self.src[pos] if pos < maximum else " "
|
||||
|
||||
isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
|
||||
isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)
|
||||
|
||||
isLastWhiteSpace = isWhiteSpace(ord(lastChar))
|
||||
isNextWhiteSpace = isWhiteSpace(ord(nextChar))
|
||||
|
||||
left_flanking = not (
|
||||
isNextWhiteSpace
|
||||
or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
|
||||
)
|
||||
right_flanking = not (
|
||||
isLastWhiteSpace
|
||||
or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
|
||||
)
|
||||
|
||||
can_open = left_flanking and (
|
||||
canSplitWord or (not right_flanking) or isLastPunctChar
|
||||
)
|
||||
can_close = right_flanking and (
|
||||
canSplitWord or (not left_flanking) or isNextPunctChar
|
||||
)
|
||||
|
||||
return Scanned(can_open, can_close, count)
|
||||
@@ -0,0 +1,127 @@
|
||||
# ~~strike through~~
|
||||
from __future__ import annotations
|
||||
|
||||
from .state_inline import Delimiter, StateInline
|
||||
|
||||
|
||||
def tokenize(state: StateInline, silent: bool) -> bool:
|
||||
"""Insert each marker as a separate text token, and add it to delimiter list"""
|
||||
start = state.pos
|
||||
ch = state.src[start]
|
||||
|
||||
if silent:
|
||||
return False
|
||||
|
||||
if ch != "~":
|
||||
return False
|
||||
|
||||
scanned = state.scanDelims(state.pos, True)
|
||||
length = scanned.length
|
||||
|
||||
if length < 2:
|
||||
return False
|
||||
|
||||
if length % 2:
|
||||
token = state.push("text", "", 0)
|
||||
token.content = ch
|
||||
length -= 1
|
||||
|
||||
i = 0
|
||||
while i < length:
|
||||
token = state.push("text", "", 0)
|
||||
token.content = ch + ch
|
||||
state.delimiters.append(
|
||||
Delimiter(
|
||||
marker=ord(ch),
|
||||
length=0, # disable "rule of 3" length checks meant for emphasis
|
||||
token=len(state.tokens) - 1,
|
||||
end=-1,
|
||||
open=scanned.can_open,
|
||||
close=scanned.can_close,
|
||||
)
|
||||
)
|
||||
|
||||
i += 2
|
||||
|
||||
state.pos += scanned.length
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
|
||||
loneMarkers = []
|
||||
maximum = len(delimiters)
|
||||
|
||||
i = 0
|
||||
while i < maximum:
|
||||
startDelim = delimiters[i]
|
||||
|
||||
if startDelim.marker != 0x7E: # /* ~ */
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if startDelim.end == -1:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
endDelim = delimiters[startDelim.end]
|
||||
|
||||
token = state.tokens[startDelim.token]
|
||||
token.type = "s_open"
|
||||
token.tag = "s"
|
||||
token.nesting = 1
|
||||
token.markup = "~~"
|
||||
token.content = ""
|
||||
|
||||
token = state.tokens[endDelim.token]
|
||||
token.type = "s_close"
|
||||
token.tag = "s"
|
||||
token.nesting = -1
|
||||
token.markup = "~~"
|
||||
token.content = ""
|
||||
|
||||
if (
|
||||
state.tokens[endDelim.token - 1].type == "text"
|
||||
and state.tokens[endDelim.token - 1].content == "~"
|
||||
):
|
||||
loneMarkers.append(endDelim.token - 1)
|
||||
|
||||
i += 1
|
||||
|
||||
# If a marker sequence has an odd number of characters, it's split
|
||||
# like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the
|
||||
# start of the sequence.
|
||||
#
|
||||
# So, we have to move all those markers after subsequent s_close tags.
|
||||
#
|
||||
while loneMarkers:
|
||||
i = loneMarkers.pop()
|
||||
j = i + 1
|
||||
|
||||
while (j < len(state.tokens)) and (state.tokens[j].type == "s_close"):
|
||||
j += 1
|
||||
|
||||
j -= 1
|
||||
|
||||
if i != j:
|
||||
token = state.tokens[j]
|
||||
state.tokens[j] = state.tokens[i]
|
||||
state.tokens[i] = token
|
||||
|
||||
|
||||
def postProcess(state: StateInline) -> None:
|
||||
"""Walk through delimiter list and replace text tokens with tags."""
|
||||
tokens_meta = state.tokens_meta
|
||||
maximum = len(state.tokens_meta)
|
||||
_postProcess(state, state.delimiters)
|
||||
|
||||
curr = 0
|
||||
while curr < maximum:
|
||||
try:
|
||||
curr_meta = tokens_meta[curr]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
if curr_meta and "delimiters" in curr_meta:
|
||||
_postProcess(state, curr_meta["delimiters"])
|
||||
curr += 1
|
||||
@@ -0,0 +1,62 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
# Skip text characters for text token, place those to pending buffer
|
||||
# and increment current pos
|
||||
from .state_inline import StateInline
|
||||
|
||||
# Rule to skip pure text
|
||||
# '{}$%@~+=:' reserved for extensions
|
||||
|
||||
# !!!! Don't confuse with "Markdown ASCII Punctuation" chars
|
||||
# http://spec.commonmark.org/0.15/#ascii-punctuation-character
|
||||
|
||||
|
||||
_TerminatorChars = {
|
||||
"\n",
|
||||
"!",
|
||||
"#",
|
||||
"$",
|
||||
"%",
|
||||
"&",
|
||||
"*",
|
||||
"+",
|
||||
"-",
|
||||
":",
|
||||
"<",
|
||||
"=",
|
||||
">",
|
||||
"@",
|
||||
"[",
|
||||
"\\",
|
||||
"]",
|
||||
"^",
|
||||
"_",
|
||||
"`",
|
||||
"{",
|
||||
"}",
|
||||
"~",
|
||||
}
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _terminator_char_regex() -> re.Pattern[str]:
|
||||
return re.compile("[" + re.escape("".join(_TerminatorChars)) + "]")
|
||||
|
||||
|
||||
def text(state: StateInline, silent: bool) -> bool:
|
||||
pos = state.pos
|
||||
posMax = state.posMax
|
||||
|
||||
terminator_char = _terminator_char_regex().search(state.src, pos)
|
||||
pos = terminator_char.start() if terminator_char else posMax
|
||||
|
||||
if pos == state.pos:
|
||||
return False
|
||||
|
||||
if not silent:
|
||||
state.pending += state.src[state.pos : pos]
|
||||
|
||||
state.pos = pos
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,178 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable, MutableMapping
|
||||
import dataclasses as dc
|
||||
from typing import Any, Literal
|
||||
import warnings
|
||||
|
||||
|
||||
def convert_attrs(value: Any) -> Any:
|
||||
"""Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict.
|
||||
|
||||
This improves compatibility with upstream markdown-it.
|
||||
"""
|
||||
if not value:
|
||||
return {}
|
||||
if isinstance(value, list):
|
||||
return dict(value)
|
||||
return value
|
||||
|
||||
|
||||
@dc.dataclass(slots=True)
|
||||
class Token:
|
||||
type: str
|
||||
"""Type of the token (string, e.g. "paragraph_open")"""
|
||||
|
||||
tag: str
|
||||
"""HTML tag name, e.g. 'p'"""
|
||||
|
||||
nesting: Literal[-1, 0, 1]
|
||||
"""Level change (number in {-1, 0, 1} set), where:
|
||||
- `1` means the tag is opening
|
||||
- `0` means the tag is self-closing
|
||||
- `-1` means the tag is closing
|
||||
"""
|
||||
|
||||
attrs: dict[str, str | int | float] = dc.field(default_factory=dict)
|
||||
"""HTML attributes.
|
||||
Note this differs from the upstream "list of lists" format,
|
||||
although than an instance can still be initialised with this format.
|
||||
"""
|
||||
|
||||
map: list[int] | None = None
|
||||
"""Source map info. Format: `[ line_begin, line_end ]`"""
|
||||
|
||||
level: int = 0
|
||||
"""Nesting level, the same as `state.level`"""
|
||||
|
||||
children: list[Token] | None = None
|
||||
"""Array of child nodes (inline and img tokens)."""
|
||||
|
||||
content: str = ""
|
||||
"""Inner content, in the case of a self-closing tag (code, html, fence, etc.),"""
|
||||
|
||||
markup: str = ""
|
||||
"""'*' or '_' for emphasis, fence string for fence, etc."""
|
||||
|
||||
info: str = ""
|
||||
"""Additional information:
|
||||
- Info string for "fence" tokens
|
||||
- The value "auto" for autolink "link_open" and "link_close" tokens
|
||||
- The string value of the item marker for ordered-list "list_item_open" tokens
|
||||
"""
|
||||
|
||||
meta: dict[Any, Any] = dc.field(default_factory=dict)
|
||||
"""A place for plugins to store any arbitrary data"""
|
||||
|
||||
block: bool = False
|
||||
"""True for block-level tokens, false for inline tokens.
|
||||
Used in renderer to calculate line breaks
|
||||
"""
|
||||
|
||||
hidden: bool = False
|
||||
"""If true, ignore this element when rendering.
|
||||
Used for tight lists to hide paragraphs.
|
||||
"""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.attrs = convert_attrs(self.attrs)
|
||||
|
||||
def attrIndex(self, name: str) -> int:
|
||||
warnings.warn( # noqa: B028
|
||||
"Token.attrIndex should not be used, since Token.attrs is a dictionary",
|
||||
UserWarning,
|
||||
)
|
||||
if name not in self.attrs:
|
||||
return -1
|
||||
return list(self.attrs.keys()).index(name)
|
||||
|
||||
def attrItems(self) -> list[tuple[str, str | int | float]]:
|
||||
"""Get (key, value) list of attrs."""
|
||||
return list(self.attrs.items())
|
||||
|
||||
def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
|
||||
"""Add `[ name, value ]` attribute to list. Init attrs if necessary."""
|
||||
name, value = attrData
|
||||
self.attrSet(name, value)
|
||||
|
||||
def attrSet(self, name: str, value: str | int | float) -> None:
|
||||
"""Set `name` attribute to `value`. Override old value if exists."""
|
||||
self.attrs[name] = value
|
||||
|
||||
def attrGet(self, name: str) -> None | str | int | float:
|
||||
"""Get the value of attribute `name`, or null if it does not exist."""
|
||||
return self.attrs.get(name, None)
|
||||
|
||||
def attrJoin(self, name: str, value: str) -> None:
|
||||
"""Join value to existing attribute via space.
|
||||
Or create new attribute if not exists.
|
||||
Useful to operate with token classes.
|
||||
"""
|
||||
if name in self.attrs:
|
||||
current = self.attrs[name]
|
||||
if not isinstance(current, str):
|
||||
raise TypeError(
|
||||
f"existing attr 'name' is not a str: {self.attrs[name]}"
|
||||
)
|
||||
self.attrs[name] = f"{current} {value}"
|
||||
else:
|
||||
self.attrs[name] = value
|
||||
|
||||
def copy(self, **changes: Any) -> Token:
|
||||
"""Return a shallow copy of the instance."""
|
||||
return dc.replace(self, **changes)
|
||||
|
||||
def as_dict(
|
||||
self,
|
||||
*,
|
||||
children: bool = True,
|
||||
as_upstream: bool = True,
|
||||
meta_serializer: Callable[[dict[Any, Any]], Any] | None = None,
|
||||
filter: Callable[[str, Any], bool] | None = None,
|
||||
dict_factory: Callable[..., MutableMapping[str, Any]] = dict,
|
||||
) -> MutableMapping[str, Any]:
|
||||
"""Return the token as a dictionary.
|
||||
|
||||
:param children: Also convert children to dicts
|
||||
:param as_upstream: Ensure the output dictionary is equal to that created by markdown-it
|
||||
For example, attrs are converted to null or lists
|
||||
:param meta_serializer: hook for serializing ``Token.meta``
|
||||
:param filter: A callable whose return code determines whether an
|
||||
attribute or element is included (``True``) or dropped (``False``).
|
||||
Is called with the (key, value) pair.
|
||||
:param dict_factory: A callable to produce dictionaries from.
|
||||
For example, to produce ordered dictionaries instead of normal Python
|
||||
dictionaries, pass in ``collections.OrderedDict``.
|
||||
|
||||
"""
|
||||
mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self))
|
||||
if filter:
|
||||
mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v))
|
||||
if as_upstream and "attrs" in mapping:
|
||||
mapping["attrs"] = (
|
||||
None
|
||||
if not mapping["attrs"]
|
||||
else [[k, v] for k, v in mapping["attrs"].items()]
|
||||
)
|
||||
if meta_serializer and "meta" in mapping:
|
||||
mapping["meta"] = meta_serializer(mapping["meta"])
|
||||
if children and mapping.get("children", None):
|
||||
mapping["children"] = [
|
||||
child.as_dict(
|
||||
children=children,
|
||||
filter=filter,
|
||||
dict_factory=dict_factory,
|
||||
as_upstream=as_upstream,
|
||||
meta_serializer=meta_serializer,
|
||||
)
|
||||
for child in mapping["children"]
|
||||
]
|
||||
return mapping
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, dct: MutableMapping[str, Any]) -> Token:
|
||||
"""Convert a dict to a Token."""
|
||||
token = cls(**dct)
|
||||
if token.children:
|
||||
token.children = [cls.from_dict(c) for c in token.children] # type: ignore[arg-type]
|
||||
return token
|
||||
@@ -0,0 +1,333 @@
|
||||
"""A tree representation of a linear markdown-it token stream.
|
||||
|
||||
This module is not part of upstream JavaScript markdown-it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Generator, Sequence
|
||||
import textwrap
|
||||
from typing import Any, NamedTuple, TypeVar, overload
|
||||
|
||||
from .token import Token
|
||||
|
||||
|
||||
class _NesterTokens(NamedTuple):
|
||||
opening: Token
|
||||
closing: Token
|
||||
|
||||
|
||||
_NodeType = TypeVar("_NodeType", bound="SyntaxTreeNode")
|
||||
|
||||
|
||||
class SyntaxTreeNode:
|
||||
"""A Markdown syntax tree node.
|
||||
|
||||
A class that can be used to construct a tree representation of a linear
|
||||
`markdown-it-py` token stream.
|
||||
|
||||
Each node in the tree represents either:
|
||||
- root of the Markdown document
|
||||
- a single unnested `Token`
|
||||
- a `Token` "_open" and "_close" token pair, and the tokens nested in
|
||||
between
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, tokens: Sequence[Token] = (), *, create_root: bool = True
|
||||
) -> None:
|
||||
"""Initialize a `SyntaxTreeNode` from a token stream.
|
||||
|
||||
If `create_root` is True, create a root node for the document.
|
||||
"""
|
||||
# Only nodes representing an unnested token have self.token
|
||||
self.token: Token | None = None
|
||||
|
||||
# Only containers have nester tokens
|
||||
self.nester_tokens: _NesterTokens | None = None
|
||||
|
||||
# Root node does not have self.parent
|
||||
self._parent: Any = None
|
||||
|
||||
# Empty list unless a non-empty container, or unnested token that has
|
||||
# children (i.e. inline or img)
|
||||
self._children: list[Any] = []
|
||||
|
||||
if create_root:
|
||||
self._set_children_from_tokens(tokens)
|
||||
return
|
||||
|
||||
if not tokens:
|
||||
raise ValueError(
|
||||
"Can only create root from empty token sequence."
|
||||
" Set `create_root=True`."
|
||||
)
|
||||
elif len(tokens) == 1:
|
||||
inline_token = tokens[0]
|
||||
if inline_token.nesting:
|
||||
raise ValueError(
|
||||
"Unequal nesting level at the start and end of token stream."
|
||||
)
|
||||
self.token = inline_token
|
||||
if inline_token.children:
|
||||
self._set_children_from_tokens(inline_token.children)
|
||||
else:
|
||||
self.nester_tokens = _NesterTokens(tokens[0], tokens[-1])
|
||||
self._set_children_from_tokens(tokens[1:-1])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{type(self).__name__}({self.type})"
|
||||
|
||||
@overload
|
||||
def __getitem__(self: _NodeType, item: int) -> _NodeType: ...
|
||||
|
||||
@overload
|
||||
def __getitem__(self: _NodeType, item: slice) -> list[_NodeType]: ...
|
||||
|
||||
def __getitem__(self: _NodeType, item: int | slice) -> _NodeType | list[_NodeType]:
|
||||
return self.children[item]
|
||||
|
||||
def to_tokens(self: _NodeType) -> list[Token]:
|
||||
"""Recover the linear token stream."""
|
||||
|
||||
def recursive_collect_tokens(node: _NodeType, token_list: list[Token]) -> None:
|
||||
if node.type == "root":
|
||||
for child in node.children:
|
||||
recursive_collect_tokens(child, token_list)
|
||||
elif node.token:
|
||||
token_list.append(node.token)
|
||||
else:
|
||||
assert node.nester_tokens
|
||||
token_list.append(node.nester_tokens.opening)
|
||||
for child in node.children:
|
||||
recursive_collect_tokens(child, token_list)
|
||||
token_list.append(node.nester_tokens.closing)
|
||||
|
||||
tokens: list[Token] = []
|
||||
recursive_collect_tokens(self, tokens)
|
||||
return tokens
|
||||
|
||||
@property
|
||||
def children(self: _NodeType) -> list[_NodeType]:
|
||||
return self._children
|
||||
|
||||
@children.setter
|
||||
def children(self: _NodeType, value: list[_NodeType]) -> None:
|
||||
self._children = value
|
||||
|
||||
@property
|
||||
def parent(self: _NodeType) -> _NodeType | None:
|
||||
return self._parent # type: ignore
|
||||
|
||||
@parent.setter
|
||||
def parent(self: _NodeType, value: _NodeType | None) -> None:
|
||||
self._parent = value
|
||||
|
||||
@property
|
||||
def is_root(self) -> bool:
|
||||
"""Is the node a special root node?"""
|
||||
return not (self.token or self.nester_tokens)
|
||||
|
||||
@property
|
||||
def is_nested(self) -> bool:
|
||||
"""Is this node nested?.
|
||||
|
||||
Returns `True` if the node represents a `Token` pair and tokens in the
|
||||
sequence between them, where `Token.nesting` of the first `Token` in
|
||||
the pair is 1 and nesting of the other `Token` is -1.
|
||||
"""
|
||||
return bool(self.nester_tokens)
|
||||
|
||||
@property
|
||||
def siblings(self: _NodeType) -> Sequence[_NodeType]:
|
||||
"""Get siblings of the node.
|
||||
|
||||
Gets the whole group of siblings, including self.
|
||||
"""
|
||||
if not self.parent:
|
||||
return [self]
|
||||
return self.parent.children
|
||||
|
||||
@property
|
||||
def type(self) -> str:
|
||||
"""Get a string type of the represented syntax.
|
||||
|
||||
- "root" for root nodes
|
||||
- `Token.type` if the node represents an unnested token
|
||||
- `Token.type` of the opening token, with "_open" suffix stripped, if
|
||||
the node represents a nester token pair
|
||||
"""
|
||||
if self.is_root:
|
||||
return "root"
|
||||
if self.token:
|
||||
return self.token.type
|
||||
assert self.nester_tokens
|
||||
return self.nester_tokens.opening.type.removesuffix("_open")
|
||||
|
||||
@property
|
||||
def next_sibling(self: _NodeType) -> _NodeType | None:
|
||||
"""Get the next node in the sequence of siblings.
|
||||
|
||||
Returns `None` if this is the last sibling.
|
||||
"""
|
||||
self_index = self.siblings.index(self)
|
||||
if self_index + 1 < len(self.siblings):
|
||||
return self.siblings[self_index + 1]
|
||||
return None
|
||||
|
||||
@property
|
||||
def previous_sibling(self: _NodeType) -> _NodeType | None:
|
||||
"""Get the previous node in the sequence of siblings.
|
||||
|
||||
Returns `None` if this is the first sibling.
|
||||
"""
|
||||
self_index = self.siblings.index(self)
|
||||
if self_index - 1 >= 0:
|
||||
return self.siblings[self_index - 1]
|
||||
return None
|
||||
|
||||
def _add_child(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
) -> None:
|
||||
"""Make a child node for `self`."""
|
||||
child = type(self)(tokens, create_root=False)
|
||||
child.parent = self
|
||||
self.children.append(child)
|
||||
|
||||
def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None:
|
||||
"""Convert the token stream to a tree structure and set the resulting
|
||||
nodes as children of `self`."""
|
||||
reversed_tokens = list(reversed(tokens))
|
||||
while reversed_tokens:
|
||||
token = reversed_tokens.pop()
|
||||
|
||||
if not token.nesting:
|
||||
self._add_child([token])
|
||||
continue
|
||||
if token.nesting != 1:
|
||||
raise ValueError("Invalid token nesting")
|
||||
|
||||
nested_tokens = [token]
|
||||
nesting = 1
|
||||
while reversed_tokens and nesting:
|
||||
token = reversed_tokens.pop()
|
||||
nested_tokens.append(token)
|
||||
nesting += token.nesting
|
||||
if nesting:
|
||||
raise ValueError(f"unclosed tokens starting {nested_tokens[0]}")
|
||||
|
||||
self._add_child(nested_tokens)
|
||||
|
||||
def pretty(
|
||||
self, *, indent: int = 2, show_text: bool = False, _current: int = 0
|
||||
) -> str:
|
||||
"""Create an XML style string of the tree."""
|
||||
prefix = " " * _current
|
||||
text = prefix + f"<{self.type}"
|
||||
if not self.is_root and self.attrs:
|
||||
text += " " + " ".join(f"{k}={v!r}" for k, v in self.attrs.items())
|
||||
text += ">"
|
||||
if (
|
||||
show_text
|
||||
and not self.is_root
|
||||
and self.type in ("text", "text_special")
|
||||
and self.content
|
||||
):
|
||||
text += "\n" + textwrap.indent(self.content, prefix + " " * indent)
|
||||
for child in self.children:
|
||||
text += "\n" + child.pretty(
|
||||
indent=indent, show_text=show_text, _current=_current + indent
|
||||
)
|
||||
return text
|
||||
|
||||
def walk(
|
||||
self: _NodeType, *, include_self: bool = True
|
||||
) -> Generator[_NodeType, None, None]:
|
||||
"""Recursively yield all descendant nodes in the tree starting at self.
|
||||
|
||||
The order mimics the order of the underlying linear token
|
||||
stream (i.e. depth first).
|
||||
"""
|
||||
if include_self:
|
||||
yield self
|
||||
for child in self.children:
|
||||
yield from child.walk(include_self=True)
|
||||
|
||||
# NOTE:
|
||||
# The values of the properties defined below directly map to properties
|
||||
# of the underlying `Token`s. A root node does not translate to a `Token`
|
||||
# object, so calling these property getters on a root node will raise an
|
||||
# `AttributeError`.
|
||||
#
|
||||
# There is no mapping for `Token.nesting` because the `is_nested` property
|
||||
# provides that data, and can be called on any node type, including root.
|
||||
|
||||
def _attribute_token(self) -> Token:
|
||||
"""Return the `Token` that is used as the data source for the
|
||||
properties defined below."""
|
||||
if self.token:
|
||||
return self.token
|
||||
if self.nester_tokens:
|
||||
return self.nester_tokens.opening
|
||||
raise AttributeError("Root node does not have the accessed attribute")
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
"""html tag name, e.g. \"p\" """
|
||||
return self._attribute_token().tag
|
||||
|
||||
@property
|
||||
def attrs(self) -> dict[str, str | int | float]:
|
||||
"""Html attributes."""
|
||||
return self._attribute_token().attrs
|
||||
|
||||
def attrGet(self, name: str) -> None | str | int | float:
|
||||
"""Get the value of attribute `name`, or null if it does not exist."""
|
||||
return self._attribute_token().attrGet(name)
|
||||
|
||||
@property
|
||||
def map(self) -> tuple[int, int] | None:
|
||||
"""Source map info. Format: `tuple[ line_begin, line_end ]`"""
|
||||
map_ = self._attribute_token().map
|
||||
if map_:
|
||||
# Type ignore because `Token`s attribute types are not perfect
|
||||
return tuple(map_) # type: ignore
|
||||
return None
|
||||
|
||||
@property
|
||||
def level(self) -> int:
|
||||
"""nesting level, the same as `state.level`"""
|
||||
return self._attribute_token().level
|
||||
|
||||
@property
|
||||
def content(self) -> str:
|
||||
"""In a case of self-closing tag (code, html, fence, etc.), it
|
||||
has contents of this tag."""
|
||||
return self._attribute_token().content
|
||||
|
||||
@property
|
||||
def markup(self) -> str:
|
||||
"""'*' or '_' for emphasis, fence string for fence, etc."""
|
||||
return self._attribute_token().markup
|
||||
|
||||
@property
|
||||
def info(self) -> str:
|
||||
"""fence infostring"""
|
||||
return self._attribute_token().info
|
||||
|
||||
@property
|
||||
def meta(self) -> dict[Any, Any]:
|
||||
"""A place for plugins to store an arbitrary data."""
|
||||
return self._attribute_token().meta
|
||||
|
||||
@property
|
||||
def block(self) -> bool:
|
||||
"""True for block-level tokens, false for inline tokens."""
|
||||
return self._attribute_token().block
|
||||
|
||||
@property
|
||||
def hidden(self) -> bool:
|
||||
"""If it's true, ignore this element when rendering.
|
||||
Used for tight lists to hide paragraphs."""
|
||||
return self._attribute_token().hidden
|
||||
@@ -0,0 +1,186 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable, Iterable, MutableMapping
|
||||
from collections.abc import MutableMapping as MutableMappingABC
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, TypedDict, cast
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
|
||||
EnvType = MutableMapping[str, Any] # note: could use TypeAlias in python 3.10
|
||||
"""Type for the environment sandbox used in parsing and rendering,
|
||||
which stores mutable variables for use by plugins and rules.
|
||||
"""
|
||||
|
||||
|
||||
class OptionsType(TypedDict):
|
||||
"""Options for parsing."""
|
||||
|
||||
maxNesting: int
|
||||
"""Internal protection, recursion limit."""
|
||||
html: bool
|
||||
"""Enable HTML tags in source."""
|
||||
linkify: bool
|
||||
"""Enable autoconversion of URL-like texts to links."""
|
||||
typographer: bool
|
||||
"""Enable smartquotes and replacements."""
|
||||
quotes: str
|
||||
"""Quote characters."""
|
||||
xhtmlOut: bool
|
||||
"""Use '/' to close single tags (<br />)."""
|
||||
breaks: bool
|
||||
"""Convert newlines in paragraphs into <br>."""
|
||||
langPrefix: str
|
||||
"""CSS language prefix for fenced blocks."""
|
||||
highlight: Callable[[str, str, str], str] | None
|
||||
"""Highlighter function: (content, lang, attrs) -> str."""
|
||||
store_labels: NotRequired[bool]
|
||||
"""Store link label in link/image token's metadata (under Token.meta['label']).
|
||||
|
||||
This is a Python only option, and is intended for the use of round-trip parsing.
|
||||
"""
|
||||
|
||||
|
||||
class PresetType(TypedDict):
|
||||
"""Preset configuration for markdown-it."""
|
||||
|
||||
options: OptionsType
|
||||
"""Options for parsing."""
|
||||
components: MutableMapping[str, MutableMapping[str, list[str]]]
|
||||
"""Components for parsing and rendering."""
|
||||
|
||||
|
||||
class OptionsDict(MutableMappingABC): # type: ignore
|
||||
"""A dictionary, with attribute access to core markdownit configuration options."""
|
||||
|
||||
# Note: ideally we would probably just remove attribute access entirely,
|
||||
# but we keep it for backwards compatibility.
|
||||
|
||||
def __init__(self, options: OptionsType) -> None:
|
||||
self._options = cast(OptionsType, dict(options))
|
||||
|
||||
def __getitem__(self, key: str) -> Any:
|
||||
return self._options[key] # type: ignore[literal-required]
|
||||
|
||||
def __setitem__(self, key: str, value: Any) -> None:
|
||||
self._options[key] = value # type: ignore[literal-required]
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
del self._options[key] # type: ignore
|
||||
|
||||
def __iter__(self) -> Iterable[str]: # type: ignore
|
||||
return iter(self._options)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._options)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return repr(self._options)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return str(self._options)
|
||||
|
||||
@property
|
||||
def maxNesting(self) -> int:
|
||||
"""Internal protection, recursion limit."""
|
||||
return self._options["maxNesting"]
|
||||
|
||||
@maxNesting.setter
|
||||
def maxNesting(self, value: int) -> None:
|
||||
self._options["maxNesting"] = value
|
||||
|
||||
@property
|
||||
def html(self) -> bool:
|
||||
"""Enable HTML tags in source."""
|
||||
return self._options["html"]
|
||||
|
||||
@html.setter
|
||||
def html(self, value: bool) -> None:
|
||||
self._options["html"] = value
|
||||
|
||||
@property
|
||||
def linkify(self) -> bool:
|
||||
"""Enable autoconversion of URL-like texts to links."""
|
||||
return self._options["linkify"]
|
||||
|
||||
@linkify.setter
|
||||
def linkify(self, value: bool) -> None:
|
||||
self._options["linkify"] = value
|
||||
|
||||
@property
|
||||
def typographer(self) -> bool:
|
||||
"""Enable smartquotes and replacements."""
|
||||
return self._options["typographer"]
|
||||
|
||||
@typographer.setter
|
||||
def typographer(self, value: bool) -> None:
|
||||
self._options["typographer"] = value
|
||||
|
||||
@property
|
||||
def quotes(self) -> str:
|
||||
"""Quote characters."""
|
||||
return self._options["quotes"]
|
||||
|
||||
@quotes.setter
|
||||
def quotes(self, value: str) -> None:
|
||||
self._options["quotes"] = value
|
||||
|
||||
@property
|
||||
def xhtmlOut(self) -> bool:
|
||||
"""Use '/' to close single tags (<br />)."""
|
||||
return self._options["xhtmlOut"]
|
||||
|
||||
@xhtmlOut.setter
|
||||
def xhtmlOut(self, value: bool) -> None:
|
||||
self._options["xhtmlOut"] = value
|
||||
|
||||
@property
|
||||
def breaks(self) -> bool:
|
||||
"""Convert newlines in paragraphs into <br>."""
|
||||
return self._options["breaks"]
|
||||
|
||||
@breaks.setter
|
||||
def breaks(self, value: bool) -> None:
|
||||
self._options["breaks"] = value
|
||||
|
||||
@property
|
||||
def langPrefix(self) -> str:
|
||||
"""CSS language prefix for fenced blocks."""
|
||||
return self._options["langPrefix"]
|
||||
|
||||
@langPrefix.setter
|
||||
def langPrefix(self, value: str) -> None:
|
||||
self._options["langPrefix"] = value
|
||||
|
||||
@property
|
||||
def highlight(self) -> Callable[[str, str, str], str] | None:
|
||||
"""Highlighter function: (content, langName, langAttrs) -> escaped HTML."""
|
||||
return self._options["highlight"]
|
||||
|
||||
@highlight.setter
|
||||
def highlight(self, value: Callable[[str, str, str], str] | None) -> None:
|
||||
self._options["highlight"] = value
|
||||
|
||||
|
||||
def read_fixture_file(path: str | Path) -> list[list[Any]]:
|
||||
text = Path(path).read_text(encoding="utf-8")
|
||||
tests = []
|
||||
section = 0
|
||||
last_pos = 0
|
||||
lines = text.splitlines(keepends=True)
|
||||
for i in range(len(lines)):
|
||||
if lines[i].rstrip() == ".":
|
||||
if section == 0:
|
||||
tests.append([i, lines[i - 1].strip()])
|
||||
section = 1
|
||||
elif section == 1:
|
||||
tests[-1].append("".join(lines[last_pos + 1 : i]))
|
||||
section = 2
|
||||
elif section == 2:
|
||||
tests[-1].append("".join(lines[last_pos + 1 : i]))
|
||||
section = 0
|
||||
|
||||
last_pos = i
|
||||
return tests
|
||||
Reference in New Issue
Block a user