1 /* 2 * MD4C: Markdown parser for C 3 * (http://github.com/mity/md4c) 4 * 5 * Copyright (c) 2016-2019 Martin Mitas 6 * Copyright (c) 2019 Guillaume Piolat (D translation) 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 * IN THE SOFTWARE. 25 */ 26 module commonmarkd.md4c; 27 28 import core.stdc.stdlib; 29 import core.stdc..string; 30 import core.stdc.stdio; 31 32 nothrow: 33 @nogc: 34 35 alias MD_CHAR = char; 36 alias MD_SIZE = uint; 37 alias MD_OFFSET = uint; 38 39 /* Block represents a part of document hierarchy structure like a paragraph 40 * or list item. 41 */ 42 alias MD_BLOCKTYPE = int; 43 enum : MD_BLOCKTYPE 44 { 45 /* <body>...</body> */ 46 MD_BLOCK_DOC = 0, 47 48 /* <blockquote>...</blockquote> */ 49 MD_BLOCK_QUOTE, 50 51 /* <ul>...</ul> 52 * Detail: Structure MD_BLOCK_UL_DETAIL. */ 53 MD_BLOCK_UL, 54 55 /* <ol>...</ol> 56 * Detail: Structure MD_BLOCK_OL_DETAIL. */ 57 MD_BLOCK_OL, 58 59 /* <li>...</li> 60 * Detail: Structure MD_BLOCK_LI_DETAIL. */ 61 MD_BLOCK_LI, 62 63 /* <hr> */ 64 MD_BLOCK_HR, 65 66 /* <h1>...</h1> (for levels up to 6) 67 * Detail: Structure MD_BLOCK_H_DETAIL. */ 68 MD_BLOCK_H, 69 70 /* <pre><code>...</code></pre> 71 * Note the text lines within code blocks are terminated with '\n' 72 * instead of explicit MD_TEXT_BR. */ 73 MD_BLOCK_CODE, 74 75 /* Raw HTML block. This itself does not correspond to any particular HTML 76 * tag. The contents of it _is_ raw HTML source intended to be put 77 * in verbatim form to the HTML output. */ 78 MD_BLOCK_HTML, 79 80 /* <p>...</p> */ 81 MD_BLOCK_P, 82 83 /* <table>...</table> and its contents. 84 * Detail: Structure MD_BLOCK_TD_DETAIL (used with MD_BLOCK_TH and MD_BLOCK_TD) 85 * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */ 86 MD_BLOCK_TABLE, 87 MD_BLOCK_THEAD, 88 MD_BLOCK_TBODY, 89 MD_BLOCK_TR, 90 MD_BLOCK_TH, 91 MD_BLOCK_TD 92 } 93 94 /* Span represents an in-line piece of a document which should be rendered with 95 * the same font, color and other attributes. A sequence of spans forms a block 96 * like paragraph or list item. */ 97 alias MD_SPANTYPE = int; 98 enum : MD_SPANTYPE 99 { 100 /* <em>...</em> */ 101 MD_SPAN_EM, 102 103 /* <strong>...</strong> */ 104 MD_SPAN_STRONG, 105 106 /* <a href="xxx">...</a> 107 * Detail: Structure MD_SPAN_A_DETAIL. */ 108 MD_SPAN_A, 109 110 /* <img src="xxx">...</a> 111 * Detail: Structure MD_SPAN_IMG_DETAIL. 112 * Note: Image text can contain nested spans and even nested images. 113 * If rendered into ALT attribute of HTML <IMG> tag, it's responsibility 114 * of the renderer to deal with it. 115 */ 116 MD_SPAN_IMG, 117 118 /* <code>...</code> */ 119 MD_SPAN_CODE, 120 121 /* <del>...</del> 122 * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. 123 */ 124 MD_SPAN_DEL, 125 126 /* For recognizing inline ($) and display ($$) equations 127 * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. 128 */ 129 MD_SPAN_LATEXMATH, 130 MD_SPAN_LATEXMATH_DISPLAY 131 } 132 133 /* Text is the actual textual contents of span. */ 134 alias MD_TEXTTYPE = int; 135 enum : MD_TEXTTYPE 136 { 137 /* Normal text. */ 138 MD_TEXT_NORMAL = 0, 139 140 /* null character. CommonMark requires replacing null character with 141 * the replacement char U+FFFD, so this allows caller to do that easily. */ 142 MD_TEXT_NULLCHAR, 143 144 /* Line breaks. 145 * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE 146 * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */ 147 MD_TEXT_BR, /* <br> (hard break) */ 148 MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */ 149 150 /* Entity. 151 * (a) Named entity, e.g. 152 * (Note MD4C does not have a list of known entities. 153 * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is 154 * treated as a named entity.) 155 * (b) Numerical entity, e.g. Ӓ 156 * (c) Hexadecimal entity, e.g. ካ 157 * 158 * As MD4C is mostly encoding agnostic, application gets the verbatim 159 * entity text into the MD_RENDERER::text_callback(). */ 160 MD_TEXT_ENTITY, 161 162 /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). 163 * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and 164 * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this 165 * kind of text. */ 166 MD_TEXT_CODE, 167 168 /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not 169 * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used. 170 * The text contains verbatim '\n' for the new lines. */ 171 MD_TEXT_HTML, 172 173 /* Text is inside an equation. This is processed the same way as inlined code 174 * spans (`code`). */ 175 MD_TEXT_LATEXMATH 176 } 177 178 179 /* Alignment enumeration. */ 180 181 alias MD_ALIGN = int; 182 enum : MD_ALIGN 183 { 184 MD_ALIGN_DEFAULT = 0, /* When unspecified. */ 185 MD_ALIGN_LEFT, 186 MD_ALIGN_CENTER, 187 MD_ALIGN_RIGHT 188 } 189 190 191 /* String attribute. 192 * 193 * This wraps strings which are outside of a normal text flow and which are 194 * propagated within various detailed structures, but which still may contain 195 * string portions of different types like e.g. entities. 196 * 197 * So, for example, lets consider an image has a title attribute string 198 * set to "foo " bar". (Note the string size is 14.) 199 * 200 * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: 201 * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) 202 * -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) 203 * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) 204 * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) 205 * 206 * Note that these conditions are guaranteed: 207 * -- substr_offsets[0] == 0 208 * -- substr_offsets[LAST+1] == size 209 * -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear. 210 */ 211 struct MD_ATTRIBUTE 212 { 213 const (MD_CHAR)* text; 214 MD_SIZE size; 215 const (MD_TEXTTYPE)* substr_types; 216 const (MD_OFFSET)* substr_offsets; 217 } 218 219 220 /* Detailed info for MD_BLOCK_UL. */ 221 struct MD_BLOCK_UL_DETAIL 222 { 223 int is_tight; /* Non-zero if tight list, zero if loose. */ 224 MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */ 225 } 226 227 /* Detailed info for MD_BLOCK_OL. */ 228 struct MD_BLOCK_OL_DETAIL 229 { 230 uint start; /* Start index of the ordered list. */ 231 int is_tight; /* Non-zero if tight list, zero if loose. */ 232 MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */ 233 } 234 235 /* Detailed info for MD_BLOCK_LI. */ 236 struct MD_BLOCK_LI_DETAIL 237 { 238 int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */ 239 MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */ 240 MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */ 241 } 242 243 /* Detailed info for MD_BLOCK_H. */ 244 struct MD_BLOCK_H_DETAIL 245 { 246 uint level; /* Header level (1 - 6) */ 247 } 248 249 /* Detailed info for MD_BLOCK_CODE. */ 250 struct MD_BLOCK_CODE_DETAIL 251 { 252 MD_ATTRIBUTE info; 253 MD_ATTRIBUTE lang; 254 MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */ 255 } 256 257 /* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */ 258 struct MD_BLOCK_TD_DETAIL 259 { 260 MD_ALIGN align_; 261 } 262 263 /* Detailed info for MD_SPAN_A. */ 264 struct MD_SPAN_A_DETAIL 265 { 266 MD_ATTRIBUTE href; 267 MD_ATTRIBUTE title; 268 } 269 270 /* Detailed info for MD_SPAN_IMG. */ 271 struct MD_SPAN_IMG_DETAIL 272 { 273 MD_ATTRIBUTE src; 274 MD_ATTRIBUTE title; 275 } 276 277 278 /* Flags specifying extensions/deviations from CommonMark specification. 279 * 280 * By default (when MD_RENDERER::flags == 0), we follow CommonMark specification. 281 * The following flags may allow some extensions or deviations from it. 282 */ 283 enum 284 { 285 MD_FLAG_COLLAPSEWHITESPACE = 0x0001, /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ 286 MD_FLAG_PERMISSIVEATXHEADERS = 0x0002, /* Do not require space in ATX headers ( ###header ) */ 287 MD_FLAG_PERMISSIVEURLAUTOLINKS = 0x0004, /* Recognize URLs as autolinks even without '<', '>' */ 288 MD_FLAG_PERMISSIVEEMAILAUTOLINKS = 0x0008, /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ 289 MD_FLAG_NOINDENTEDCODEBLOCKS = 0x0010, /* Disable indented code blocks. (Only fenced code works.) */ 290 MD_FLAG_NOHTMLBLOCKS = 0x0020, /* Disable raw HTML blocks. */ 291 MD_FLAG_NOHTMLSPANS = 0x0040, /* Disable raw HTML (inline). */ 292 MD_FLAG_TABLES = 0x0100, /* Enable tables extension. */ 293 MD_FLAG_STRIKETHROUGH = 0x0200, /* Enable strikethrough extension. */ 294 MD_FLAG_PERMISSIVEWWWAUTOLINKS = 0x0400, /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ 295 MD_FLAG_TASKLISTS = 0x0800, /* Enable task list extension. */ 296 MD_FLAG_LATEXMATHSPANS = 0x1000, /* Enable $ and $$ containing LaTeX equations. */ 297 298 MD_FLAG_PERMISSIVEAUTOLINKS = MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS, 299 MD_FLAG_NOHTML = MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS, 300 301 /* Convenient sets of flags corresponding to well-known Markdown dialects. 302 * 303 * Note we may only support subset of features of the referred dialect. 304 * The constant just enables those extensions which bring us as close as 305 * possible given what features we implement. 306 * 307 * ABI compatibility note: Meaning of these can change in time as new 308 * extensions, bringing the dialect closer to the original, are implemented. 309 */ 310 MD_DIALECT_COMMONMARK = 0, 311 MD_DIALECT_GITHUB = (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS), 312 } 313 314 /* Renderer structure. 315 */ 316 struct MD_PARSER 317 { 318 nothrow: 319 @nogc: 320 /* Reserved. Set to zero. 321 */ 322 uint abi_version; 323 324 /* Dialect options. Bitmask of MD_FLAG_xxxx values. 325 */ 326 uint flags; 327 328 /* Caller-provided rendering callbacks. 329 * 330 * For some block/span types, more detailed information is provided in a 331 * type-specific structure pointed by the argument 'detail'. 332 * 333 * The last argument of all callbacks, 'userdata', is just propagated from 334 * md_parse() and is available for any use by the application. 335 * 336 * Note any strings provided to the callbacks as their arguments or as 337 * members of any detail structure are generally not zero-terminated. 338 * Application has take the respective size information into account. 339 * 340 * Callbacks may abort further parsing of the document by returning non-zero. 341 */ 342 int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_block; 343 int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_block; 344 345 int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_span; 346 int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_span; 347 348 int function(MD_TEXTTYPE /*type*/, const(MD_CHAR)* /*text*/, MD_SIZE /*size*/, void* /*userdata*/) text; 349 350 /* Debug callback. Optional (may be null). 351 * 352 * If provided and something goes wrong, this function gets called. 353 * This is intended for debugging and problem diagnosis for developers; 354 * it is not intended to provide any errors suitable for displaying to an 355 * end user. 356 */ 357 void function(const(char)* /*msg*/, void* /*userdata*/) debug_log; 358 359 /* Reserved. Set to null. 360 */ 361 void function() syntax; 362 } 363 364 365 /***************************** 366 *** Miscellaneous Stuff *** 367 *****************************/ 368 369 370 /* Misc. macros. */ 371 372 enum TRUE = 1; 373 enum FALSE = 0; 374 375 376 /************************ 377 *** Internal Types *** 378 ************************/ 379 380 /* These are omnipresent so lets save some typing. */ 381 alias CHAR = MD_CHAR; 382 alias SZ = MD_SIZE; 383 alias OFF = MD_OFFSET; 384 385 /* During analyzes of inline marks, we need to manage some "mark chains", 386 * of (yet unresolved) openers. This structure holds start/end of the chain. 387 * The chain internals are then realized through MD_MARK::prev and ::next. 388 */ 389 struct MD_MARKCHAIN 390 { 391 int head; /* Index of first mark in the chain, or -1 if empty. */ 392 int tail; /* Index of last mark in the chain, or -1 if empty. */ 393 } 394 395 enum OPENERS_CHAIN_FIRST = 2; 396 enum OPENERS_CHAIN_LAST = 11; 397 398 /* Context propagated through all the parsing. */ 399 struct MD_CTX 400 { 401 nothrow: 402 @nogc: 403 404 /* Immutable stuff (parameters of md_parse()). */ 405 const(CHAR)* text; 406 SZ size; 407 MD_PARSER parser; 408 void* userdata; 409 410 /* When this is true, it allows some optimizations. */ 411 int doc_ends_with_newline; 412 413 /* Helper temporary growing buffer. */ 414 CHAR* buffer; 415 uint alloc_buffer; 416 417 /* Reference definitions. */ 418 MD_REF_DEF* ref_defs; 419 int n_ref_defs; 420 int alloc_ref_defs; 421 void** ref_def_hashtable; 422 int ref_def_hashtable_size; 423 424 /* Stack of inline/span markers. 425 * This is only used for parsing a single block contents but by storing it 426 * here we may reuse the stack for subsequent blocks; i.e. we have fewer 427 * (re)allocations. */ 428 MD_MARK* marks; 429 int n_marks; 430 int alloc_marks; 431 432 ubyte[256] mark_char_map; 433 /* For resolving of inline spans. */ 434 MD_MARKCHAIN[12] mark_chains; 435 436 MD_MARKCHAIN* PTR_CHAIN() { return &mark_chains[0]; } 437 MD_MARKCHAIN* TABLECELLBOUNDARIES() { return &mark_chains[1]; } 438 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_0() { return &mark_chains[2]; } 439 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_1() { return &mark_chains[3]; } 440 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_2() { return &mark_chains[4]; } 441 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_0() { return &mark_chains[5]; } 442 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_1() { return &mark_chains[6]; } 443 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_2() { return &mark_chains[7]; } 444 MD_MARKCHAIN* UNDERSCORE_OPENERS() { return &mark_chains[8]; } 445 MD_MARKCHAIN* TILDE_OPENERS() { return &mark_chains[9]; } 446 MD_MARKCHAIN* BRACKET_OPENERS() { return &mark_chains[10]; } 447 MD_MARKCHAIN* DOLLAR_OPENERS() { return &mark_chains[11]; } 448 449 int n_table_cell_boundaries; 450 451 /* For resolving links. */ 452 int unresolved_link_head; 453 int unresolved_link_tail; 454 455 /* For resolving raw HTML. */ 456 OFF html_comment_horizon; 457 OFF html_proc_instr_horizon; 458 OFF html_decl_horizon; 459 OFF html_cdata_horizon; 460 461 /* For block analysis. 462 * Notes: 463 * -- It holds MD_BLOCK as well as MD_LINE structures. After each 464 * MD_BLOCK, its (multiple) MD_LINE(s) follow. 465 * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used 466 * instead of MD_LINE(s). 467 */ 468 void* block_bytes; 469 MD_BLOCK* current_block; 470 int n_block_bytes; 471 int alloc_block_bytes; 472 473 /* For container block analysis. */ 474 MD_CONTAINER* containers; 475 int n_containers; 476 int alloc_containers; 477 478 /* Minimal indentation to call the block "indented code block". */ 479 uint code_indent_offset; 480 481 /* Contextual info for line analysis. */ 482 SZ code_fence_length; /* For checking closing fence length. */ 483 int html_block_type; /* For checking closing raw HTML condition. */ 484 int last_line_has_list_loosening_effect; 485 int last_list_item_starts_with_two_blank_lines; 486 487 void MD_LOG(const(char)* msg) 488 { 489 if(parser.debug_log != null) 490 parser.debug_log(msg, userdata); 491 } 492 493 /* Character accessors. */ 494 CHAR CH(OFF off) 495 { 496 return text[off]; 497 } 498 499 const(CHAR)* STR(OFF off) 500 { 501 return text + off; 502 } 503 504 bool ISANYOF(OFF off, const(CHAR)* palette) { return ISANYOF_(CH(off), palette); } 505 bool ISANYOF2(OFF off, CHAR ch1, CHAR ch2) { return ISANYOF2_(CH(off), ch1, ch2); } 506 bool ISANYOF3(OFF off, CHAR ch1, CHAR ch2, CHAR ch3) { return ISANYOF3_(CH(off), ch1, ch2, ch3); } 507 bool ISASCII(OFF off) { return ISASCII_(CH(off)); } 508 bool ISBLANK(OFF off) { return ISBLANK_(CH(off)); } 509 bool ISNEWLINE(OFF off) { return ISNEWLINE_(CH(off)); } 510 bool ISWHITESPACE(OFF off) { return ISWHITESPACE_(CH(off)); } 511 bool ISCNTRL(OFF off) { return ISCNTRL_(CH(off)); } 512 bool ISPUNCT(OFF off) { return ISPUNCT_(CH(off)); } 513 bool ISUPPER(OFF off) { return ISUPPER_(CH(off)); } 514 bool ISLOWER(OFF off) { return ISLOWER_(CH(off)); } 515 bool ISALPHA(OFF off) { return ISALPHA_(CH(off)); } 516 bool ISDIGIT(OFF off) { return ISDIGIT_(CH(off)); } 517 bool ISXDIGIT(OFF off) { return ISXDIGIT_(CH(off)); } 518 bool ISALNUM(OFF off) { return ISALNUM_(CH(off)); } 519 } 520 521 alias MD_LINETYPE = int; 522 enum : MD_LINETYPE 523 { 524 MD_LINE_BLANK, 525 MD_LINE_HR, 526 MD_LINE_ATXHEADER, 527 MD_LINE_SETEXTHEADER, 528 MD_LINE_SETEXTUNDERLINE, 529 MD_LINE_INDENTEDCODE, 530 MD_LINE_FENCEDCODE, 531 MD_LINE_HTML, 532 MD_LINE_TEXT, 533 MD_LINE_TABLE, 534 MD_LINE_TABLEUNDERLINE 535 } 536 537 struct MD_LINE_ANALYSIS 538 { 539 nothrow: 540 @nogc: 541 short type_; 542 ushort data_; 543 544 MD_LINETYPE type() const 545 { 546 return type_; 547 } 548 549 void type(MD_LINETYPE value) 550 { 551 type_ = cast(short)value; 552 } 553 554 int data() const 555 { 556 return data_; 557 } 558 559 void data(uint value) 560 { 561 data_ = cast(ushort)value; 562 } 563 564 OFF beg; 565 OFF end; 566 uint indent; /* Indentation level. */ 567 } 568 569 struct MD_LINE 570 { 571 OFF beg; 572 OFF end; 573 } 574 575 struct MD_VERBATIMLINE 576 { 577 OFF beg; 578 OFF end; 579 OFF indent; 580 } 581 582 583 /***************** 584 *** Helpers *** 585 *****************/ 586 587 pure 588 { 589 /* Character classification. 590 * Note we assume ASCII compatibility of code points < 128 here. */ 591 bool ISIN_(CHAR ch, CHAR ch_min, CHAR ch_max) 592 { 593 return (ch_min <= cast(uint)(ch) && cast(uint)(ch) <= ch_max); 594 } 595 596 bool ISANYOF_(CHAR ch, const(CHAR)* palette) 597 { 598 return md_strchr(palette, ch) != null; 599 } 600 601 bool ISANYOF2_(CHAR ch, CHAR ch1, CHAR ch2) 602 { 603 return (ch == ch1) || (ch == ch2); 604 } 605 606 bool ISANYOF3_(CHAR ch, CHAR ch1, CHAR ch2, CHAR ch3) 607 { 608 return (ch == ch1) || (ch == ch2) || (ch == ch3); 609 } 610 611 bool ISASCII_(CHAR ch) 612 { 613 return (cast(uint)ch) <= 127; 614 } 615 616 bool ISBLANK_(CHAR ch) 617 { 618 return ISANYOF2_(ch, ' ', '\t'); 619 } 620 621 bool ISNEWLINE_(CHAR ch) 622 { 623 return ISANYOF2_(ch, '\r', '\n'); 624 } 625 626 bool ISWHITESPACE_(CHAR ch) 627 { 628 return ISBLANK_(ch) || ISANYOF2_(ch, '\v', '\f'); 629 } 630 631 bool ISCNTRL_(CHAR ch) 632 { 633 return (cast(uint)(ch) <= 31 || cast(uint)(ch) == 127); 634 } 635 636 bool ISPUNCT_(CHAR ch) 637 { 638 return ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126); 639 } 640 641 bool ISUPPER_(CHAR ch) 642 { 643 return ISIN_(ch, 'A', 'Z'); 644 } 645 646 bool ISLOWER_(CHAR ch) 647 { 648 return ISIN_(ch, 'a', 'z'); 649 } 650 651 bool ISALPHA_(CHAR ch) 652 { 653 return ISUPPER_(ch) || ISLOWER_(ch); 654 } 655 656 bool ISDIGIT_(CHAR ch) 657 { 658 return ISIN_(ch, '0', '9'); 659 } 660 661 bool ISXDIGIT_(CHAR ch) 662 { 663 return ISDIGIT_(ch) || ISIN_(ch, 'A', 'F') || ISIN_(ch, 'a', 'f'); 664 } 665 666 bool ISALNUM_(CHAR ch) 667 { 668 return ISALPHA_(ch) || ISDIGIT_(ch); 669 } 670 } 671 672 const(CHAR)* md_strchr(const(CHAR)* str, CHAR ch) pure 673 { 674 OFF i; 675 for(i = 0; str[i] != '\0'; i++) { 676 if(ch == str[i]) 677 return (str + i); 678 } 679 return null; 680 } 681 682 /* Case insensitive check of string equality. */ 683 int md_ascii_case_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) 684 { 685 OFF i; 686 for(i = 0; i < n; i++) { 687 CHAR ch1 = s1[i]; 688 CHAR ch2 = s2[i]; 689 690 if(ISLOWER_(ch1)) 691 ch1 += ('A'-'a'); 692 if(ISLOWER_(ch2)) 693 ch2 += ('A'-'a'); 694 if(ch1 != ch2) 695 return FALSE; 696 } 697 return TRUE; 698 } 699 700 int md_ascii_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) 701 { 702 return memcmp(s1, s2, n * CHAR.sizeof) == 0; 703 } 704 705 int md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const(CHAR)* str, SZ size) 706 { 707 OFF off = 0; 708 int ret = 0; 709 710 while(1) { 711 while(off < size && str[off] != '\0') 712 off++; 713 714 if(off > 0) { 715 ret = ctx.parser.text(type, str, off, ctx.userdata); 716 if(ret != 0) 717 return ret; 718 719 str += off; 720 size -= off; 721 off = 0; 722 } 723 724 if(off >= size) 725 return 0; 726 727 ret = ctx.parser.text(MD_TEXT_NULLCHAR, "", 1, ctx.userdata); 728 if(ret != 0) 729 return ret; 730 off++; 731 } 732 } 733 734 int MD_TEMP_BUFFER(MD_CTX* ctx, SZ sz) 735 { 736 if(sz > ctx.alloc_buffer) 737 { 738 CHAR* new_buffer; 739 SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; 740 new_buffer = cast(CHAR*) realloc(ctx.buffer, new_size); 741 if (new_buffer == null) 742 { 743 ctx.MD_LOG("realloc() failed."); 744 return -1; 745 } 746 ctx.buffer = new_buffer; 747 ctx.alloc_buffer = new_size; 748 } 749 return 0; 750 } 751 752 int MD_ENTER_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) 753 { 754 int ret = ctx.parser.enter_block(type, arg, ctx.userdata); 755 if(ret != 0) 756 { 757 ctx.MD_LOG("Aborted from enter_block() callback."); 758 return ret; 759 } 760 return 0; 761 } 762 763 int MD_LEAVE_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) 764 { 765 int ret = ctx.parser.leave_block(type, arg, ctx.userdata); 766 if(ret != 0) 767 { 768 ctx.MD_LOG("Aborted from leave_block() callback."); 769 return ret; 770 } 771 return 0; 772 } 773 774 int MD_ENTER_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) 775 { 776 int ret = ctx.parser.enter_span(type, arg, ctx.userdata); 777 if(ret != 0) 778 { 779 ctx.MD_LOG("Aborted from enter_span() callback."); 780 return ret; 781 } 782 return 0; 783 } 784 785 int MD_LEAVE_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) 786 { 787 int ret = ctx.parser.leave_span(type, arg, ctx.userdata); 788 if(ret != 0) 789 { 790 ctx.MD_LOG("Aborted from leave_span() callback."); 791 return ret; 792 } 793 return 0; 794 } 795 796 int MD_TEXT(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) 797 { 798 if(size > 0) 799 { 800 int ret = ctx.parser.text((type), (str), (size), ctx.userdata); 801 if (ret != 0) 802 { 803 ctx.MD_LOG("Aborted from text() callback."); 804 return ret; 805 } 806 } 807 return 0; 808 } 809 810 int MD_TEXT_INSECURE(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) 811 { 812 if(size > 0) 813 { 814 int ret = md_text_with_null_replacement(ctx, type, str, size); 815 if(ret != 0) 816 { 817 ctx.MD_LOG("Aborted from text() callback."); 818 return ret; 819 } 820 } 821 return 0; 822 } 823 824 /************************* 825 *** Unicode Support *** 826 *************************/ 827 828 struct MD_UNICODE_FOLD_INFO 829 { 830 uint[3] codepoints; 831 int n_codepoints; 832 }; 833 834 835 836 /* Binary search over sorted "map" of codepoints. Consecutive sequences 837 * of codepoints may be encoded in the map by just using the 838 * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000). 839 * 840 * Returns index of the found record in the map (in the case of ranges, 841 * the minimal value is used); or -1 on failure. */ 842 int md_unicode_bsearch__(uint codepoint, const(uint)* map, size_t map_size) 843 { 844 int beg, end; 845 int pivot_beg, pivot_end; 846 847 beg = 0; 848 end = cast(int) map_size-1; 849 while(beg <= end) { 850 /* Pivot may be a range, not just a single value. */ 851 pivot_beg = pivot_end = (beg + end) / 2; 852 if(map[pivot_end] & 0x40000000) 853 pivot_end++; 854 if(map[pivot_beg] & 0x80000000) 855 pivot_beg--; 856 857 if(codepoint < (map[pivot_beg] & 0x00ffffff)) 858 end = pivot_beg - 1; 859 else if(codepoint > (map[pivot_end] & 0x00ffffff)) 860 beg = pivot_end + 1; 861 else 862 return pivot_beg; 863 } 864 865 return -1; 866 } 867 868 bool md_is_unicode_whitespace__(uint codepoint) 869 { 870 /* Unicode "Zs" category. 871 * (generated by scripts/build_whitespace_map.py) */ 872 static immutable uint[] WHITESPACE_MAP = 873 [ 874 0x0020, 0x00a0, 0x1680, 0x2000| 0x40000000, 0x200a | 0x80000000, 0x202f, 0x205f, 0x3000 875 ]; 876 877 /* The ASCII ones are the most frequently used ones, also CommonMark 878 * specification requests few more in this range. */ 879 if(codepoint <= 0x7f) 880 return ISWHITESPACE_(cast(CHAR)codepoint); 881 882 return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP.ptr, WHITESPACE_MAP.length) >= 0); 883 } 884 885 bool md_is_unicode_punct__(uint codepoint) 886 { 887 /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. 888 * (generated by scripts/build_punct_map.py) */ 889 static immutable uint[] PUNCT_MAP = 890 [ 891 0x0021 | 0x40000000,0x0023 | 0x80000000, 0x0025 | 0x40000000,0x002a | 0x80000000, 0x002c | 0x40000000,0x002f | 0x80000000, 0x003a | 0x40000000,0x003b | 0x80000000, 0x003f | 0x40000000,0x0040 | 0x80000000, 892 0x005b | 0x40000000,0x005d | 0x80000000, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00a7, 0x00ab, 0x00b6 | 0x40000000,0x00b7 | 0x80000000, 893 0x00bb, 0x00bf, 0x037e, 0x0387, 0x055a | 0x40000000,0x055f | 0x80000000, 0x0589 | 0x40000000,0x058a | 0x80000000, 0x05be, 0x05c0, 894 0x05c3, 0x05c6, 0x05f3 | 0x40000000,0x05f4 | 0x80000000, 0x0609 | 0x40000000,0x060a | 0x80000000, 0x060c | 0x40000000,0x060d | 0x80000000, 0x061b, 0x061e | 0x40000000,0x061f | 0x80000000, 895 0x066a | 0x40000000,0x066d | 0x80000000, 0x06d4, 0x0700 | 0x40000000,0x070d | 0x80000000, 0x07f7 | 0x40000000,0x07f9 | 0x80000000, 0x0830 | 0x40000000,0x083e | 0x80000000, 0x085e, 896 0x0964 | 0x40000000,0x0965 | 0x80000000, 0x0970, 0x09fd, 0x0a76, 0x0af0, 0x0c77, 0x0c84, 0x0df4, 0x0e4f, 897 0x0e5a | 0x40000000,0x0e5b | 0x80000000, 0x0f04 | 0x40000000,0x0f12 | 0x80000000, 0x0f14, 0x0f3a | 0x40000000,0x0f3d | 0x80000000, 0x0f85, 0x0fd0 | 0x40000000,0x0fd4 | 0x80000000, 898 0x0fd9 | 0x40000000,0x0fda | 0x80000000, 0x104a | 0x40000000,0x104f | 0x80000000, 0x10fb, 0x1360 | 0x40000000,0x1368 | 0x80000000, 0x1400, 0x166e, 0x169b | 0x40000000,0x169c | 0x80000000, 899 0x16eb | 0x40000000,0x16ed | 0x80000000, 0x1735 | 0x40000000,0x1736 | 0x80000000, 0x17d4 | 0x40000000,0x17d6 | 0x80000000, 0x17d8 | 0x40000000,0x17da | 0x80000000, 0x1800 | 0x40000000,0x180a | 0x80000000, 900 0x1944 | 0x40000000,0x1945 | 0x80000000, 0x1a1e | 0x40000000,0x1a1f | 0x80000000, 0x1aa0 | 0x40000000,0x1aa6 | 0x80000000, 0x1aa8 | 0x40000000,0x1aad | 0x80000000, 0x1b5a | 0x40000000,0x1b60 | 0x80000000, 901 0x1bfc | 0x40000000,0x1bff | 0x80000000, 0x1c3b | 0x40000000,0x1c3f | 0x80000000, 0x1c7e | 0x40000000,0x1c7f | 0x80000000, 0x1cc0 | 0x40000000,0x1cc7 | 0x80000000, 0x1cd3, 0x2010 | 0x40000000,0x2027 | 0x80000000, 902 0x2030 | 0x40000000,0x2043 | 0x80000000, 0x2045 | 0x40000000,0x2051 | 0x80000000, 0x2053 | 0x40000000,0x205e | 0x80000000, 0x207d | 0x40000000,0x207e | 0x80000000, 0x208d | 0x40000000,0x208e | 0x80000000, 903 0x2308 | 0x40000000,0x230b | 0x80000000, 0x2329 | 0x40000000,0x232a | 0x80000000, 0x2768 | 0x40000000,0x2775 | 0x80000000, 0x27c5 | 0x40000000,0x27c6 | 0x80000000, 0x27e6 | 0x40000000,0x27ef | 0x80000000, 904 0x2983 | 0x40000000,0x2998 | 0x80000000, 0x29d8 | 0x40000000,0x29db | 0x80000000, 0x29fc | 0x40000000,0x29fd | 0x80000000, 0x2cf9 | 0x40000000,0x2cfc | 0x80000000, 0x2cfe | 0x40000000,0x2cff | 0x80000000, 0x2d70, 905 0x2e00 | 0x40000000,0x2e2e | 0x80000000, 0x2e30 | 0x40000000,0x2e4f | 0x80000000, 0x3001 | 0x40000000,0x3003 | 0x80000000, 0x3008 | 0x40000000,0x3011 | 0x80000000, 0x3014 | 0x40000000,0x301f | 0x80000000, 0x3030, 906 0x303d, 0x30a0, 0x30fb, 0xa4fe | 0x40000000,0xa4ff | 0x80000000, 0xa60d | 0x40000000,0xa60f | 0x80000000, 0xa673, 0xa67e, 907 0xa6f2 | 0x40000000,0xa6f7 | 0x80000000, 0xa874 | 0x40000000,0xa877 | 0x80000000, 0xa8ce | 0x40000000,0xa8cf | 0x80000000, 0xa8f8 | 0x40000000,0xa8fa | 0x80000000, 0xa8fc, 0xa92e | 0x40000000,0xa92f | 0x80000000, 908 0xa95f, 0xa9c1 | 0x40000000,0xa9cd | 0x80000000, 0xa9de | 0x40000000,0xa9df | 0x80000000, 0xaa5c | 0x40000000,0xaa5f | 0x80000000, 0xaade | 0x40000000,0xaadf | 0x80000000, 0xaaf0 | 0x40000000,0xaaf1 | 0x80000000, 909 0xabeb, 0xfd3e | 0x40000000,0xfd3f | 0x80000000, 0xfe10 | 0x40000000,0xfe19 | 0x80000000, 0xfe30 | 0x40000000,0xfe52 | 0x80000000, 0xfe54 | 0x40000000,0xfe61 | 0x80000000, 0xfe63, 0xfe68, 910 0xfe6a | 0x40000000,0xfe6b | 0x80000000, 0xff01 | 0x40000000,0xff03 | 0x80000000, 0xff05 | 0x40000000,0xff0a | 0x80000000, 0xff0c | 0x40000000,0xff0f | 0x80000000, 0xff1a | 0x40000000,0xff1b | 0x80000000, 911 0xff1f | 0x40000000,0xff20 | 0x80000000, 0xff3b | 0x40000000,0xff3d | 0x80000000, 0xff3f, 0xff5b, 0xff5d, 0xff5f | 0x40000000,0xff65 | 0x80000000, 0x10100 | 0x40000000,0x10102 | 0x80000000, 912 0x1039f, 0x103d0, 0x1056f, 0x10857, 0x1091f, 0x1093f, 0x10a50 | 0x40000000,0x10a58 | 0x80000000, 0x10a7f, 913 0x10af0 | 0x40000000,0x10af6 | 0x80000000, 0x10b39 | 0x40000000,0x10b3f | 0x80000000, 0x10b99 | 0x40000000,0x10b9c | 0x80000000, 0x10f55 | 0x40000000,0x10f59 | 0x80000000, 0x11047 | 0x40000000,0x1104d | 0x80000000, 914 0x110bb | 0x40000000,0x110bc | 0x80000000, 0x110be | 0x40000000,0x110c1 | 0x80000000, 0x11140 | 0x40000000,0x11143 | 0x80000000, 0x11174 | 0x40000000,0x11175 | 0x80000000, 0x111c5 | 0x40000000,0x111c8 | 0x80000000, 915 0x111cd, 0x111db, 0x111dd | 0x40000000,0x111df | 0x80000000, 0x11238 | 0x40000000,0x1123d | 0x80000000, 0x112a9, 0x1144b | 0x40000000,0x1144f | 0x80000000, 916 0x1145b, 0x1145d, 0x114c6, 0x115c1 | 0x40000000,0x115d7 | 0x80000000, 0x11641 | 0x40000000,0x11643 | 0x80000000, 0x11660 | 0x40000000,0x1166c | 0x80000000, 917 0x1173c | 0x40000000,0x1173e | 0x80000000, 0x1183b, 0x119e2, 0x11a3f | 0x40000000,0x11a46 | 0x80000000, 0x11a9a | 0x40000000,0x11a9c | 0x80000000, 0x11a9e | 0x40000000,0x11aa2 | 0x80000000, 918 0x11c41 | 0x40000000,0x11c45 | 0x80000000, 0x11c70 | 0x40000000,0x11c71 | 0x80000000, 0x11ef7 | 0x40000000,0x11ef8 | 0x80000000, 0x11fff, 0x12470 | 0x40000000,0x12474 | 0x80000000, 919 0x16a6e | 0x40000000,0x16a6f | 0x80000000, 0x16af5, 0x16b37 | 0x40000000,0x16b3b | 0x80000000, 0x16b44, 0x16e97 | 0x40000000,0x16e9a | 0x80000000, 0x16fe2, 920 0x1bc9f, 0x1da87 | 0x40000000,0x1da8b | 0x80000000, 0x1e95e | 0x40000000,0x1e95f | 0x80000000 921 ]; 922 923 /* The ASCII ones are the most frequently used ones, also CommonMark 924 * specification requests few more in this range. */ 925 if(codepoint <= 0x7f) 926 return ISPUNCT_(cast(CHAR)codepoint); 927 928 return (md_unicode_bsearch__(codepoint, PUNCT_MAP.ptr, PUNCT_MAP.length) >= 0); 929 } 930 931 void md_get_unicode_fold_info(uint codepoint, MD_UNICODE_FOLD_INFO* info) 932 { 933 /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. 934 * (generated by scripts/build_punct_map.py) */ 935 static immutable uint[] FOLD_MAP_1 = 936 [ 937 0x0041 | 0x40000000, 0x005a | 0x80000000, 0x00b5, 0x00c0 | 0x40000000, 0x00d6 | 0x80000000, 0x00d8 | 0x40000000, 0x00de | 0x80000000, 0x0100 | 0x40000000, 0x012e | 0x80000000, 0x0132 | 0x40000000, 0x0136 | 0x80000000, 938 0x0139 | 0x40000000, 0x0147 | 0x80000000, 0x014a | 0x40000000, 0x0176 | 0x80000000, 0x0178, 0x0179 | 0x40000000, 0x017d | 0x80000000, 0x017f, 0x0181, 0x0182, 939 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x018f, 0x0190, 0x0191, 0x0193, 940 0x0194, 0x0196, 0x0197, 0x0198, 0x019c, 0x019d, 0x019f, 0x01a0 | 0x40000000, 0x01a4 | 0x80000000, 0x01a6, 941 0x01a7, 0x01a9, 0x01ac, 0x01ae, 0x01af, 0x01b1, 0x01b3, 0x01b7, 0x01b8, 942 0x01bc, 0x01c4, 0x01c5, 0x01c7, 0x01c8, 0x01ca, 0x01cb | 0x40000000, 0x01db | 0x80000000, 0x01de | 0x40000000, 0x01ee | 0x80000000, 943 0x01f1, 0x01f2, 0x01f6, 0x01f7, 0x01f8 | 0x40000000, 0x021e | 0x80000000, 0x0220, 0x0222 | 0x40000000, 0x0232 | 0x80000000, 0x023a, 944 0x023b, 0x023d, 0x023e, 0x0241, 0x0243, 0x0244, 0x0245, 0x0246 | 0x40000000, 0x024e | 0x80000000, 0x0345, 945 0x0370, 0x0376, 0x037f, 0x0386, 0x0388 | 0x40000000, 0x038a | 0x80000000, 0x038c, 0x038e, 0x0391 | 0x40000000, 0x03a1 | 0x80000000, 946 0x03a3 | 0x40000000, 0x03ab | 0x80000000, 0x03c2, 0x03cf, 0x03d0, 0x03d1, 0x03d5, 0x03d6, 0x03d8 | 0x40000000, 0x03ee | 0x80000000, 947 0x03f0, 0x03f1, 0x03f4, 0x03f5, 0x03f7, 0x03f9, 0x03fa, 0x03fd | 0x40000000, 0x03ff | 0x80000000, 948 0x0400 | 0x40000000, 0x040f | 0x80000000, 0x0410 | 0x40000000, 0x042f | 0x80000000, 0x0460 | 0x40000000, 0x0480 | 0x80000000, 0x048a | 0x40000000, 0x04be | 0x80000000, 0x04c0, 0x04c1 | 0x40000000, 0x04cd | 0x80000000, 949 0x04d0 | 0x40000000, 0x052e | 0x80000000, 0x0531 | 0x40000000, 0x0556 | 0x80000000, 0x10a0 | 0x40000000, 0x10c5 | 0x80000000, 0x10c7, 0x10cd, 0x13f8 | 0x40000000, 0x13fd | 0x80000000, 0x1c80, 950 0x1c81, 0x1c82, 0x1c83, 0x1c85, 0x1c86, 0x1c87, 0x1c88, 0x1c90 | 0x40000000, 0x1cba | 0x80000000, 951 0x1cbd | 0x40000000, 0x1cbf | 0x80000000, 0x1e00 | 0x40000000, 0x1e94 | 0x80000000, 0x1e9b, 0x1ea0 | 0x40000000, 0x1efe | 0x80000000, 0x1f08 | 0x40000000, 0x1f0f | 0x80000000, 0x1f18 | 0x40000000, 0x1f1d | 0x80000000, 952 0x1f28 | 0x40000000, 0x1f2f | 0x80000000, 0x1f38 | 0x40000000, 0x1f3f | 0x80000000, 0x1f48 | 0x40000000, 0x1f4d | 0x80000000, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 953 0x1f68 | 0x40000000, 0x1f6f | 0x80000000, 0x1fb8, 0x1fba, 0x1fbe, 0x1fc8 | 0x40000000, 0x1fcb | 0x80000000, 0x1fd8, 0x1fda, 0x1fe8, 954 0x1fea, 0x1fec, 0x1ff8, 0x1ffa, 0x2126, 0x212a, 0x212b, 0x2132, 0x2160 | 0x40000000, 0x216f | 0x80000000, 955 0x2183, 0x24b6 | 0x40000000, 0x24cf | 0x80000000, 0x2c00 | 0x40000000, 0x2c2e | 0x80000000, 0x2c60, 0x2c62, 0x2c63, 0x2c64, 956 0x2c67 | 0x40000000, 0x2c6b | 0x80000000, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, 0x2c72, 0x2c75, 0x2c7e, 957 0x2c80 | 0x40000000, 0x2ce2 | 0x80000000, 0x2ceb, 0x2cf2, 0xa640 | 0x40000000, 0xa66c | 0x80000000, 0xa680 | 0x40000000, 0xa69a | 0x80000000, 0xa722 | 0x40000000, 0xa72e | 0x80000000, 958 0xa732 | 0x40000000, 0xa76e | 0x80000000, 0xa779, 0xa77d, 0xa77e | 0x40000000, 0xa786 | 0x80000000, 0xa78b, 0xa78d, 0xa790, 959 0xa796 | 0x40000000, 0xa7a8 | 0x80000000, 0xa7aa, 0xa7ab, 0xa7ac, 0xa7ad, 0xa7ae, 0xa7b0, 0xa7b1, 0xa7b2, 960 0xa7b3, 0xa7b4 | 0x40000000, 0xa7be | 0x80000000, 0xa7c2, 0xa7c4, 0xa7c5, 0xa7c6, 0xab70 | 0x40000000, 0xabbf | 0x80000000, 961 0xff21 | 0x40000000, 0xff3a | 0x80000000, 0x10400 | 0x40000000, 0x10427 | 0x80000000, 0x104b0 | 0x40000000, 0x104d3 | 0x80000000, 0x10c80 | 0x40000000, 0x10cb2 | 0x80000000, 0x118a0 | 0x40000000, 0x118bf | 0x80000000, 962 0x16e40 | 0x40000000, 0x16e5f | 0x80000000, 0x1e900 | 0x40000000, 0x1e921 | 0x80000000 963 ]; 964 965 static immutable uint[] FOLD_MAP_1_DATA = 966 [ 967 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, 968 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0254, 0x0188, 0x0256, 0x018c, 0x01dd, 969 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, 0x01a1, 0x01a5, 970 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x01b4, 0x0292, 0x01b9, 0x01bd, 0x01c6, 0x01c6, 971 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, 0x0195, 0x01bf, 0x01f9, 0x021f, 972 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 973 0x03b9, 0x0371, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, 0x03cc, 0x03cd, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 974 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 975 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 976 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 977 0x0434, 0x043e, 0x0441, 0x0442, 0x044a, 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 978 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 979 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f67, 0x1fb0, 0x1f70, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1f76, 980 0x1fe0, 0x1f7a, 0x1fe5, 0x1f78, 0x1f7c, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, 0x217f, 0x2184, 0x24d0, 981 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, 0x0271, 0x0250, 0x0252, 982 0x2c73, 0x2c76, 0x023f, 0x2c81, 0x2ce3, 0x2cec, 0x2cf3, 0xa641, 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 983 0xa733, 0xa76f, 0xa77a, 0x1d79, 0xa77f, 0xa787, 0xa78c, 0x0265, 0xa791, 0xa797, 0xa7a9, 0x0266, 0x025c, 984 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 985 0x13a0, 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 986 0x16e60, 0x16e7f, 0x1e922, 0x1e943 987 ]; 988 989 static immutable uint[] FOLD_MAP_2 = 990 [ 991 0x00df, 0x0130, 0x0149, 0x01f0, 0x0587, 0x1e96, 0x1e97, 0x1e98, 0x1e99, 992 0x1e9a, 0x1e9e, 0x1f50, 0x1f80 | 0x40000000, 0x1f87 | 0x80000000, 0x1f88 | 0x40000000, 0x1f8f | 0x80000000, 0x1f90 | 0x40000000, 0x1f97 | 0x80000000, 0x1f98 | 0x40000000, 0x1f9f | 0x80000000, 993 0x1fa0 | 0x40000000, 0x1fa7 | 0x80000000, 0x1fa8 | 0x40000000, 0x1faf | 0x80000000, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fbc, 0x1fc2, 994 0x1fc3, 0x1fc4, 0x1fc6, 0x1fcc, 0x1fd6, 0x1fe4, 0x1fe6, 0x1ff2, 0x1ff3, 995 0x1ff4, 0x1ff6, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb05, 0xfb06, 0xfb13, 996 0xfb14, 0xfb15, 0xfb16, 0xfb17 997 ]; 998 999 static immutable uint[] FOLD_MAP_2_DATA = 1000 [ 1001 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308, 1002 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9, 1003 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9, 1004 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342, 1005 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342, 1006 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9, 1007 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565, 1008 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d 1009 ]; 1010 1011 static immutable uint[] FOLD_MAP_3 = 1012 [ 1013 0x0390, 0x03b0, 0x1f52, 0x1f54, 0x1f56, 0x1fb7, 0x1fc7, 0x1fd2, 0x1fd3, 1014 0x1fd7, 0x1fe2, 0x1fe3, 0x1fe7, 0x1ff7, 0xfb03, 0xfb04 1015 ]; 1016 1017 static immutable uint[] FOLD_MAP_3_DATA = 1018 [ 1019 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301, 1020 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300, 1021 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301, 1022 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c 1023 ]; 1024 1025 static struct FOLD_MAP 1026 { 1027 const(uint)* map; 1028 const(uint)* data; 1029 size_t map_size; 1030 int n_codepoints; 1031 } 1032 1033 /*static immutable*/ FOLD_MAP[3] FOLD_MAP_LIST = 1034 [ 1035 FOLD_MAP(FOLD_MAP_1.ptr, FOLD_MAP_1_DATA.ptr, FOLD_MAP_1.length, 1), 1036 FOLD_MAP(FOLD_MAP_2.ptr, FOLD_MAP_2_DATA.ptr, FOLD_MAP_2.length, 2), 1037 FOLD_MAP(FOLD_MAP_3.ptr, FOLD_MAP_3_DATA.ptr, FOLD_MAP_3.length, 3), 1038 ]; 1039 1040 int i; 1041 1042 /* Fast path for ASCII characters. */ 1043 if(codepoint <= 0x7f) { 1044 info.codepoints[0] = codepoint; 1045 if(ISUPPER_(cast(CHAR)codepoint)) 1046 info.codepoints[0] += 'a' - 'A'; 1047 info.n_codepoints = 1; 1048 return; 1049 } 1050 1051 /* Try to locate the codepoint in any of the maps. */ 1052 for(i = 0; i < cast(int) (FOLD_MAP_LIST.length); i++) { 1053 int index; 1054 1055 index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size); 1056 if(index >= 0) { 1057 /* Found the mapping. */ 1058 int n_codepoints = FOLD_MAP_LIST[i].n_codepoints; 1059 const uint* map = FOLD_MAP_LIST[i].map; 1060 const uint* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints); 1061 1062 memcpy(info.codepoints.ptr, codepoints, uint.sizeof * n_codepoints); 1063 info.n_codepoints = n_codepoints; 1064 1065 if(FOLD_MAP_LIST[i].map[index] != codepoint) { 1066 /* The found mapping maps whole range of codepoints, 1067 * i.e. we have to offset info.codepoints[0] accordingly. */ 1068 if((map[index] & 0x00ffffff)+1 == codepoints[0]) { 1069 /* Alternating type of the range. */ 1070 info.codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0); 1071 } else { 1072 /* Range to range kind of mapping. */ 1073 info.codepoints[0] += (codepoint - (map[index] & 0x00ffffff)); 1074 } 1075 } 1076 1077 return; 1078 } 1079 } 1080 1081 /* No mapping found. Map the codepoint to itself. */ 1082 info.codepoints[0] = codepoint; 1083 info.n_codepoints = 1; 1084 } 1085 1086 1087 bool IS_UTF8_LEAD1(CHAR ch) 1088 { 1089 return cast(ubyte)(ch) <= 0x7f; 1090 } 1091 1092 bool IS_UTF8_LEAD2(CHAR ch) 1093 { 1094 return (cast(ubyte)(ch) & 0xe0) == 0xc0; 1095 } 1096 1097 bool IS_UTF8_LEAD3(CHAR ch) 1098 { 1099 return (cast(ubyte)(ch) & 0xf0) == 0xe0; 1100 } 1101 1102 bool IS_UTF8_LEAD4(CHAR ch) 1103 { 1104 return (cast(ubyte)(ch) & 0xf8) == 0xf0; 1105 } 1106 1107 bool IS_UTF8_TAIL(CHAR ch) 1108 { 1109 return (cast(ubyte)(ch) & 0xc0) == 0x80; 1110 } 1111 1112 uint md_decode_utf8__(const(CHAR)* str, SZ str_size, SZ* p_size) 1113 { 1114 if(!IS_UTF8_LEAD1(str[0])) { 1115 if(IS_UTF8_LEAD2(str[0])) { 1116 if(1 < str_size && IS_UTF8_TAIL(str[1])) { 1117 if(p_size != null) 1118 *p_size = 2; 1119 1120 return ((cast(uint)str[0] & 0x1f) << 6) | 1121 ((cast(uint)str[1] & 0x3f) << 0); 1122 } 1123 } else if(IS_UTF8_LEAD3(str[0])) { 1124 if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) { 1125 if(p_size != null) 1126 *p_size = 3; 1127 1128 return ((cast(uint)str[0] & 0x0f) << 12) | 1129 ((cast(uint)str[1] & 0x3f) << 6) | 1130 ((cast(uint)str[2] & 0x3f) << 0); 1131 } 1132 } else if(IS_UTF8_LEAD4(str[0])) { 1133 if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) { 1134 if(p_size != null) 1135 *p_size = 4; 1136 1137 return ((cast(uint)str[0] & 0x07) << 18) | 1138 ((cast(uint)str[1] & 0x3f) << 12) | 1139 ((cast(uint)str[2] & 0x3f) << 6) | 1140 ((cast(uint)str[3] & 0x3f) << 0); 1141 } 1142 } 1143 } 1144 1145 if(p_size != null) 1146 *p_size = 1; 1147 return cast(uint) str[0]; 1148 } 1149 1150 uint md_decode_utf8_before__(MD_CTX* ctx, OFF off) 1151 { 1152 if(!IS_UTF8_LEAD1(ctx.CH(off-1))) { 1153 if(off > 1 && IS_UTF8_LEAD2(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1154 return ((cast(uint)ctx.CH(off-2) & 0x1f) << 6) | 1155 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1156 1157 if(off > 2 && IS_UTF8_LEAD3(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1158 return ((cast(uint)ctx.CH(off-3) & 0x0f) << 12) | 1159 ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | 1160 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1161 1162 if(off > 3 && IS_UTF8_LEAD4(ctx.CH(off-4)) && IS_UTF8_TAIL(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1163 return ((cast(uint)ctx.CH(off-4) & 0x07) << 18) | 1164 ((cast(uint)ctx.CH(off-3) & 0x3f) << 12) | 1165 ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | 1166 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1167 } 1168 1169 return cast(uint) ctx.CH(off-1); 1170 } 1171 1172 bool ISUNICODEWHITESPACE_(uint codepoint) 1173 { 1174 return md_is_unicode_whitespace__(codepoint); 1175 } 1176 1177 bool ISUNICODEWHITESPACE(MD_CTX* ctx, OFF off) 1178 { 1179 return md_is_unicode_whitespace__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); 1180 } 1181 1182 bool ISUNICODEWHITESPACEBEFORE(MD_CTX* ctx, OFF off) 1183 { 1184 return md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off)); 1185 } 1186 1187 bool ISUNICODEPUNCT(MD_CTX* ctx, OFF off) 1188 { 1189 return md_is_unicode_punct__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); 1190 } 1191 1192 bool ISUNICODEPUNCTBEFORE(MD_CTX* ctx, OFF off) 1193 { 1194 return md_is_unicode_punct__(md_decode_utf8_before__(ctx, off)); 1195 } 1196 1197 uint md_decode_unicode(const(CHAR)* str, OFF off, SZ str_size, SZ* p_char_size) 1198 { 1199 return md_decode_utf8__(str+off, str_size-off, p_char_size); 1200 } 1201 1202 /************************************* 1203 *** Helper string manipulations *** 1204 *************************************/ 1205 1206 /* Fill buffer with copy of the string between 'beg' and 'end' but replace any 1207 * line breaks with given replacement character. 1208 * 1209 * NOTE: Caller is responsible to make sure the buffer is large enough. 1210 * (Given the output is always shorter then input, (end - beg) is good idea 1211 * what the caller should allocate.) 1212 */ 1213 void md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, 1214 CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size) 1215 { 1216 CHAR* ptr = buffer; 1217 int line_index = 0; 1218 OFF off = beg; 1219 1220 while(1) { 1221 const MD_LINE* line = &lines[line_index]; 1222 OFF line_end = line.end; 1223 if(end < line_end) 1224 line_end = end; 1225 1226 while(off < line_end) { 1227 *ptr = ctx.CH(off); 1228 ptr++; 1229 off++; 1230 } 1231 1232 if(off >= end) { 1233 *p_size = cast(uint)(ptr - buffer); 1234 return; 1235 } 1236 1237 *ptr = line_break_replacement_char; 1238 ptr++; 1239 1240 line_index++; 1241 off = lines[line_index].beg; 1242 } 1243 } 1244 1245 /* Wrapper of md_merge_lines() which allocates new buffer for the output string. 1246 */ 1247 int md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, 1248 CHAR line_break_replacement_char, const(CHAR)** p_str, SZ* p_size) 1249 { 1250 CHAR* buffer; 1251 1252 buffer = cast(CHAR*) malloc(CHAR.sizeof * (end - beg)); 1253 if(buffer == null) { 1254 ctx.MD_LOG("malloc() failed."); 1255 return -1; 1256 } 1257 1258 md_merge_lines(ctx, beg, end, lines, n_lines, 1259 line_break_replacement_char, buffer, p_size); 1260 1261 *p_str = buffer; 1262 return 0; 1263 } 1264 1265 OFF md_skip_unicode_whitespace(const(CHAR)* label, OFF off, SZ size) 1266 { 1267 SZ char_size; 1268 uint codepoint; 1269 1270 while(off < size) { 1271 codepoint = md_decode_unicode(label, off, size, &char_size); 1272 if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off])) 1273 break; 1274 off += char_size; 1275 } 1276 1277 return off; 1278 } 1279 1280 1281 /****************************** 1282 *** Recognizing raw HTML *** 1283 ******************************/ 1284 1285 /* md_is_html_tag() may be called when processing inlines (inline raw HTML) 1286 * or when breaking document to blocks (checking for start of HTML block type 7). 1287 * 1288 * When breaking document to blocks, we do not yet know line boundaries, but 1289 * in that case the whole tag has to live on a single line. We distinguish this 1290 * by n_lines == 0. 1291 */ 1292 int md_is_html_tag(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1293 { 1294 int attr_state; 1295 OFF off = beg; 1296 OFF line_end = (n_lines > 0) ? lines[0].end : ctx.size; 1297 int i = 0; 1298 1299 assert(ctx.CH(beg) == '<'); 1300 1301 if(off + 1 >= line_end) 1302 return FALSE; 1303 off++; 1304 1305 /* For parsing attributes, we need a little state automaton below. 1306 * State -1: no attributes are allowed. 1307 * State 0: attribute could follow after some whitespace. 1308 * State 1: after a whitespace (attribute name may follow). 1309 * State 2: after attribute name ('=' MAY follow). 1310 * State 3: after '=' (value specification MUST follow). 1311 * State 41: in middle of unquoted attribute value. 1312 * State 42: in middle of single-quoted attribute value. 1313 * State 43: in middle of double-quoted attribute value. 1314 */ 1315 attr_state = 0; 1316 1317 if(ctx.CH(off) == '/') { 1318 /* Closer tag "</ ... >". No attributes may be present. */ 1319 attr_state = -1; 1320 off++; 1321 } 1322 1323 /* Tag name */ 1324 if(off >= line_end || !ctx.ISALPHA(off)) 1325 return FALSE; 1326 off++; 1327 while(off < line_end && (ctx.ISALNUM(off) || ctx.CH(off) == '-')) 1328 off++; 1329 1330 /* (Optional) attributes (if not closer), (optional) '/' (if not closer) 1331 * and final '>'. */ 1332 while(1) { 1333 while(off < line_end && !ctx.ISNEWLINE(off)) { 1334 if(attr_state > 40) { 1335 if(attr_state == 41 && (ctx.ISBLANK(off) || ctx.ISANYOF(off, "\"'=<>`"))) { 1336 attr_state = 0; 1337 off--; /* Put the char back for re-inspection in the new state. */ 1338 } else if(attr_state == 42 && ctx.CH(off) == '\'') { 1339 attr_state = 0; 1340 } else if(attr_state == 43 && ctx.CH(off) == '"') { 1341 attr_state = 0; 1342 } 1343 off++; 1344 } else if(ctx.ISWHITESPACE(off)) { 1345 if(attr_state == 0) 1346 attr_state = 1; 1347 off++; 1348 } else if(attr_state <= 2 && ctx.CH(off) == '>') { 1349 /* End. */ 1350 goto done; 1351 } else if(attr_state <= 2 && ctx.CH(off) == '/' && off+1 < line_end && ctx.CH(off+1) == '>') { 1352 /* End with digraph '/>' */ 1353 off++; 1354 goto done; 1355 } else if((attr_state == 1 || attr_state == 2) && (ctx.ISALPHA(off) || ctx.CH(off) == '_' || ctx.CH(off) == ':')) { 1356 off++; 1357 /* Attribute name */ 1358 while(off < line_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, "_.:-"))) 1359 off++; 1360 attr_state = 2; 1361 } else if(attr_state == 2 && ctx.CH(off) == '=') { 1362 /* Attribute assignment sign */ 1363 off++; 1364 attr_state = 3; 1365 } else if(attr_state == 3) { 1366 /* Expecting start of attribute value. */ 1367 if(ctx.CH(off) == '"') 1368 attr_state = 43; 1369 else if(ctx.CH(off) == '\'') 1370 attr_state = 42; 1371 else if(!ctx.ISANYOF(off, "\"'=<>`") && !ctx.ISNEWLINE(off)) 1372 attr_state = 41; 1373 else 1374 return FALSE; 1375 off++; 1376 } else { 1377 /* Anything unexpected. */ 1378 return FALSE; 1379 } 1380 } 1381 1382 /* We have to be on a single line. See definition of start condition 1383 * of HTML block, type 7. */ 1384 if(n_lines == 0) 1385 return FALSE; 1386 1387 i++; 1388 if(i >= n_lines) 1389 return FALSE; 1390 1391 off = lines[i].beg; 1392 line_end = lines[i].end; 1393 1394 if(attr_state == 0 || attr_state == 41) 1395 attr_state = 1; 1396 1397 if(off >= max_end) 1398 return FALSE; 1399 } 1400 1401 done: 1402 if(off >= max_end) 1403 return FALSE; 1404 1405 *p_end = off+1; 1406 return TRUE; 1407 } 1408 1409 static int 1410 md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len, 1411 const MD_LINE* lines, int n_lines, 1412 OFF beg, OFF max_end, OFF* p_end, 1413 OFF* p_scan_horizon) 1414 { 1415 OFF off = beg; 1416 int i = 0; 1417 1418 if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) { 1419 /* We have already scanned the range up to the max_end so we know 1420 * there is nothing to see. */ 1421 return FALSE; 1422 } 1423 1424 while(TRUE) { 1425 while(off + len <= lines[i].end && off + len <= max_end) { 1426 if(md_ascii_eq(ctx.STR(off), str, len)) { 1427 /* Success. */ 1428 *p_end = off + len; 1429 return TRUE; 1430 } 1431 off++; 1432 } 1433 1434 i++; 1435 if(off >= max_end || i >= n_lines) { 1436 /* Failure. */ 1437 *p_scan_horizon = off; 1438 return FALSE; 1439 } 1440 1441 off = lines[i].beg; 1442 } 1443 } 1444 1445 static int 1446 md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1447 { 1448 OFF off = beg; 1449 1450 assert(ctx.CH(beg) == '<'); 1451 1452 if(off + 4 >= lines[0].end) 1453 return FALSE; 1454 if(ctx.CH(off+1) != '!' || ctx.CH(off+2) != '-' || ctx.CH(off+3) != '-') 1455 return FALSE; 1456 off += 4; 1457 1458 /* ">" and "." must not follow the opening. */ 1459 if(off < lines[0].end && ctx.CH(off) == '>') 1460 return FALSE; 1461 if(off+1 < lines[0].end && ctx.CH(off) == '-' && ctx.CH(off+1) == '>') 1462 return FALSE; 1463 1464 /* HTML comment must not contain "--", so we scan just for "--" instead 1465 * of "-." and verify manually that '>' follows. */ 1466 if(md_scan_for_html_closer(ctx, "--", 2, 1467 lines, n_lines, off, max_end, p_end, &ctx.html_comment_horizon)) 1468 { 1469 if(*p_end < max_end && ctx.CH(*p_end) == '>') { 1470 *p_end = *p_end + 1; 1471 return TRUE; 1472 } 1473 } 1474 1475 return FALSE; 1476 } 1477 1478 static int 1479 md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1480 { 1481 OFF off = beg; 1482 1483 if(off + 2 >= lines[0].end) 1484 return FALSE; 1485 if(ctx.CH(off+1) != '?') 1486 return FALSE; 1487 off += 2; 1488 1489 return md_scan_for_html_closer(ctx, "?>", 2, 1490 lines, n_lines, off, max_end, p_end, &ctx.html_proc_instr_horizon); 1491 } 1492 1493 static int 1494 md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1495 { 1496 OFF off = beg; 1497 1498 if(off + 2 >= lines[0].end) 1499 return FALSE; 1500 if(ctx.CH(off+1) != '!') 1501 return FALSE; 1502 off += 2; 1503 1504 /* Declaration name. */ 1505 if(off >= lines[0].end || !ctx.ISALPHA(off)) 1506 return FALSE; 1507 off++; 1508 while(off < lines[0].end && ctx.ISALPHA(off)) 1509 off++; 1510 if(off < lines[0].end && !ctx.ISWHITESPACE(off)) 1511 return FALSE; 1512 1513 return md_scan_for_html_closer(ctx, ">", 1, 1514 lines, n_lines, off, max_end, p_end, &ctx.html_decl_horizon); 1515 } 1516 1517 static int 1518 md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1519 { 1520 string open_str = "<![CDATA["; 1521 1522 OFF off = beg; 1523 1524 if(off + open_str.length >= lines[0].end) 1525 return FALSE; 1526 if(memcmp(ctx.STR(off), open_str.ptr, open_str.length) != 0) 1527 return FALSE; 1528 off += open_str.length; 1529 1530 if(lines[n_lines-1].end < max_end) 1531 max_end = lines[n_lines-1].end - 2; 1532 1533 return md_scan_for_html_closer(ctx, "]]>", 3, 1534 lines, n_lines, off, max_end, p_end, &ctx.html_cdata_horizon); 1535 } 1536 1537 static int 1538 md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1539 { 1540 assert(ctx.CH(beg) == '<'); 1541 return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) || 1542 md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) || 1543 md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) || 1544 md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) || 1545 md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end)); 1546 } 1547 1548 1549 /**************************** 1550 *** Recognizing Entity *** 1551 ****************************/ 1552 1553 static int 1554 md_is_hex_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1555 { 1556 OFF off = beg; 1557 1558 while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8) 1559 off++; 1560 1561 if(1 <= off - beg && off - beg <= 6) { 1562 *p_end = off; 1563 return TRUE; 1564 } else { 1565 return FALSE; 1566 } 1567 } 1568 1569 static int 1570 md_is_dec_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1571 { 1572 OFF off = beg; 1573 1574 while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8) 1575 off++; 1576 1577 if(1 <= off - beg && off - beg <= 7) { 1578 *p_end = off; 1579 return TRUE; 1580 } else { 1581 return FALSE; 1582 } 1583 } 1584 1585 static int 1586 md_is_named_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1587 { 1588 OFF off = beg; 1589 1590 if(off < max_end && ISALPHA_(text[off])) 1591 off++; 1592 else 1593 return FALSE; 1594 1595 while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48) 1596 off++; 1597 1598 if(2 <= off - beg && off - beg <= 48) { 1599 *p_end = off; 1600 return TRUE; 1601 } else { 1602 return FALSE; 1603 } 1604 } 1605 1606 static int 1607 md_is_entity_str(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1608 { 1609 int is_contents; 1610 OFF off = beg; 1611 1612 assert(text[off] == '&'); 1613 off++; 1614 1615 if(off+2 < max_end && text[off] == '#' && (text[off+1] == 'x' || text[off+1] == 'X')) 1616 is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off); 1617 else if(off+1 < max_end && text[off] == '#') 1618 is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off); 1619 else 1620 is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off); 1621 1622 if(is_contents && off < max_end && text[off] == ';') { 1623 *p_end = off+1; 1624 return TRUE; 1625 } else { 1626 return FALSE; 1627 } 1628 } 1629 1630 static int 1631 md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 1632 { 1633 return md_is_entity_str(ctx, ctx.text, beg, max_end, p_end); 1634 } 1635 1636 1637 /****************************** 1638 *** Attribute Management *** 1639 ******************************/ 1640 1641 struct MD_ATTRIBUTE_BUILD 1642 { 1643 CHAR* text = null; 1644 MD_TEXTTYPE* substr_types = null; 1645 OFF* substr_offsets = null; 1646 int substr_count = 0; 1647 int substr_alloc = 0; 1648 MD_TEXTTYPE[1] trivial_types = [0]; 1649 OFF[2] trivial_offsets = [0, 0]; 1650 } 1651 1652 1653 enum MD_BUILD_ATTR_NO_ESCAPES = 0x0001; 1654 1655 int md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, 1656 MD_TEXTTYPE type, OFF off) 1657 { 1658 if(build.substr_count >= build.substr_alloc) { 1659 MD_TEXTTYPE* new_substr_types; 1660 OFF* new_substr_offsets; 1661 1662 build.substr_alloc = (build.substr_alloc == 0 ? 8 : build.substr_alloc * 2); 1663 1664 new_substr_types = cast(MD_TEXTTYPE*) realloc(build.substr_types, 1665 build.substr_alloc * MD_TEXTTYPE.sizeof); 1666 if(new_substr_types == null) { 1667 ctx.MD_LOG("realloc() failed."); 1668 return -1; 1669 } 1670 /* Note +1 to reserve space for final offset (== raw_size). */ 1671 new_substr_offsets = cast(OFF*) realloc(build.substr_offsets, 1672 (build.substr_alloc+1) * OFF.sizeof); 1673 if(new_substr_offsets == null) { 1674 ctx.MD_LOG("realloc() failed."); 1675 free(new_substr_types); 1676 return -1; 1677 } 1678 1679 build.substr_types = new_substr_types; 1680 build.substr_offsets = new_substr_offsets; 1681 } 1682 1683 build.substr_types[build.substr_count] = type; 1684 build.substr_offsets[build.substr_count] = off; 1685 build.substr_count++; 1686 return 0; 1687 } 1688 1689 void md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) 1690 { 1691 if(build.substr_alloc > 0) { 1692 free(build.text); 1693 free(build.substr_types); 1694 free(build.substr_offsets); 1695 } 1696 } 1697 1698 int md_build_attribute(MD_CTX* ctx, const(CHAR)* raw_text, SZ raw_size, 1699 uint flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) 1700 { 1701 OFF raw_off, off; 1702 int is_trivial; 1703 int ret = 0; 1704 1705 memset(build, 0, MD_ATTRIBUTE_BUILD.sizeof); 1706 1707 /* If there is no backslash and no ampersand, build trivial attribute 1708 * without any malloc(). */ 1709 is_trivial = TRUE; 1710 for(raw_off = 0; raw_off < raw_size; raw_off++) { 1711 if(ISANYOF3_(raw_text[raw_off], '\\', '&', '\0')) { 1712 is_trivial = FALSE; 1713 break; 1714 } 1715 } 1716 1717 if(is_trivial) { 1718 build.text = cast(CHAR*) (raw_size ? raw_text : null); 1719 build.substr_types = build.trivial_types.ptr; 1720 build.substr_offsets = build.trivial_offsets.ptr; 1721 build.substr_count = 1; 1722 build.substr_alloc = 0; 1723 build.trivial_types[0] = MD_TEXT_NORMAL; 1724 build.trivial_offsets[0] = 0; 1725 build.trivial_offsets[1] = raw_size; 1726 off = raw_size; 1727 } else { 1728 build.text = cast(CHAR*) malloc(raw_size * CHAR.sizeof); 1729 if(build.text == null) { 1730 ctx.MD_LOG("malloc() failed."); 1731 goto abort; 1732 } 1733 1734 raw_off = 0; 1735 off = 0; 1736 1737 while(raw_off < raw_size) { 1738 if(raw_text[raw_off] == '\0') { 1739 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); 1740 if (ret < 0) goto abort; 1741 memcpy(build.text + off, raw_text + raw_off, 1); 1742 off++; 1743 raw_off++; 1744 continue; 1745 } 1746 1747 if(raw_text[raw_off] == '&') { 1748 OFF ent_end; 1749 1750 if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) { 1751 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off)); 1752 if (ret < 0) goto abort; 1753 memcpy(build.text + off, raw_text + raw_off, ent_end - raw_off); 1754 off += ent_end - raw_off; 1755 raw_off = ent_end; 1756 continue; 1757 } 1758 } 1759 1760 if(build.substr_count == 0 || build.substr_types[build.substr_count-1] != MD_TEXT_NORMAL) 1761 { 1762 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off)); 1763 if (ret < 0) goto abort; 1764 } 1765 1766 if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) && 1767 raw_text[raw_off] == '\\' && raw_off+1 < raw_size && 1768 (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1]))) 1769 raw_off++; 1770 1771 build.text[off++] = raw_text[raw_off++]; 1772 } 1773 build.substr_offsets[build.substr_count] = off; 1774 } 1775 1776 attr.text = build.text; 1777 attr.size = off; 1778 attr.substr_offsets = build.substr_offsets; 1779 attr.substr_types = build.substr_types; 1780 return 0; 1781 1782 abort: 1783 md_free_attribute(ctx, build); 1784 return -1; 1785 } 1786 1787 1788 /********************************************* 1789 *** Dictionary of Reference Definitions *** 1790 *********************************************/ 1791 1792 enum MD_FNV1A_BASE = 2166136261; 1793 enum MD_FNV1A_PRIME = 16777619; 1794 1795 uint md_fnv1a(uint base, const(void)* data, size_t n) 1796 { 1797 const(ubyte)* buf = cast(const(ubyte)*) data; 1798 uint hash = base; 1799 size_t i; 1800 1801 for(i = 0; i < n; i++) { 1802 hash ^= buf[i]; 1803 hash *= MD_FNV1A_PRIME; 1804 } 1805 1806 return hash; 1807 } 1808 1809 1810 struct MD_REF_DEF 1811 { 1812 const(CHAR)* label; 1813 const(CHAR)* title; 1814 uint hash; 1815 SZ label_size; 1816 bool label_needs_free; 1817 bool title_needs_free; 1818 SZ title_size; 1819 OFF dest_beg; 1820 OFF dest_end; 1821 }; 1822 1823 /* Label equivalence is quite complicated with regards to whitespace and case 1824 * folding. This complicates computing a hash of it as well as direct comparison 1825 * of two labels. */ 1826 1827 uint md_link_label_hash(const(CHAR)* label, SZ size) 1828 { 1829 uint hash = MD_FNV1A_BASE; 1830 OFF off; 1831 uint codepoint; 1832 int is_whitespace = FALSE; 1833 1834 off = md_skip_unicode_whitespace(label, 0, size); 1835 while(off < size) { 1836 SZ char_size; 1837 1838 codepoint = md_decode_unicode(label, off, size, &char_size); 1839 is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]); 1840 1841 if(is_whitespace) { 1842 codepoint = ' '; 1843 hash = md_fnv1a(hash, &codepoint, uint.sizeof); 1844 off = md_skip_unicode_whitespace(label, off, size); 1845 } else { 1846 MD_UNICODE_FOLD_INFO fold_info; 1847 1848 md_get_unicode_fold_info(codepoint, &fold_info); 1849 hash = md_fnv1a(hash, fold_info.codepoints.ptr, fold_info.n_codepoints * uint.sizeof); 1850 off += char_size; 1851 } 1852 } 1853 1854 return hash; 1855 } 1856 1857 OFF md_link_label_cmp_load_fold_info(const(CHAR)* label, OFF off, SZ size, 1858 MD_UNICODE_FOLD_INFO* fold_info) 1859 { 1860 uint codepoint; 1861 SZ char_size; 1862 1863 if(off >= size) { 1864 /* Treat end of link label as a whitespace. */ 1865 goto whitespace; 1866 } 1867 1868 if(ISNEWLINE_(label[off])) { 1869 /* Treat new lines as a whitespace. */ 1870 off++; 1871 goto whitespace; 1872 } 1873 1874 codepoint = md_decode_unicode(label, off, size, &char_size); 1875 off += char_size; 1876 if(ISUNICODEWHITESPACE_(codepoint)) { 1877 /* Treat all whitespace as equivalent */ 1878 goto whitespace; 1879 } 1880 1881 /* Get real folding info. */ 1882 md_get_unicode_fold_info(codepoint, fold_info); 1883 return off; 1884 1885 whitespace: 1886 fold_info.codepoints[0] = ' '; 1887 fold_info.n_codepoints = 1; 1888 return off; 1889 } 1890 1891 static int 1892 md_link_label_cmp(const(CHAR)* a_label, SZ a_size, const(CHAR)* b_label, SZ b_size) 1893 { 1894 OFF a_off; 1895 OFF b_off; 1896 int a_reached_end = FALSE; 1897 int b_reached_end = FALSE; 1898 MD_UNICODE_FOLD_INFO a_fi = { 0 }; 1899 MD_UNICODE_FOLD_INFO b_fi = { 0 }; 1900 OFF a_fi_off = 0; 1901 OFF b_fi_off = 0; 1902 int cmp; 1903 1904 a_off = md_skip_unicode_whitespace(a_label, 0, a_size); 1905 b_off = md_skip_unicode_whitespace(b_label, 0, b_size); 1906 while(!a_reached_end && !b_reached_end) { 1907 /* If needed, load fold info for next char. */ 1908 if(a_fi_off >= a_fi.n_codepoints) { 1909 a_fi_off = 0; 1910 a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); 1911 a_reached_end = (a_off >= a_size); 1912 } 1913 if(b_fi_off >= b_fi.n_codepoints) { 1914 b_fi_off = 0; 1915 b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); 1916 b_reached_end = (b_off >= b_size); 1917 } 1918 1919 cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; 1920 if(cmp != 0) 1921 return cmp; 1922 1923 a_fi_off++; 1924 b_fi_off++; 1925 } 1926 1927 return 0; 1928 } 1929 1930 struct MD_REF_DEF_LIST 1931 { 1932 nothrow: 1933 @nogc: 1934 1935 int n_ref_defs; 1936 int alloc_ref_defs; 1937 1938 /* Valid items always point into ctx.ref_defs[] */ 1939 MD_REF_DEF* ref_defs_space; // Starting here, a list of pointer at the end of the struct 1940 1941 // To allocate a MD_REF_DEF_LIST 1942 static size_t SIZEOF(int numDefRefs) 1943 { 1944 return 8 + (MD_REF_DEF*).sizeof * numDefRefs; 1945 } 1946 1947 // Returns: a slice of ref defs embedded at the end of the struct 1948 static MD_REF_DEF*[] refDefs(MD_REF_DEF_LIST* list) 1949 { 1950 return (&(list.ref_defs_space))[0..list.n_ref_defs]; 1951 } 1952 1953 ref MD_REF_DEF* ref_defs_nth(size_t index) 1954 { 1955 MD_REF_DEF** base = &ref_defs_space; 1956 return base[index]; 1957 } 1958 } 1959 1960 extern(C) int md_ref_def_cmp(const(void)* a, const void* b) 1961 { 1962 const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; 1963 const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; 1964 1965 if(a_ref.hash < b_ref.hash) 1966 return -1; 1967 else if(a_ref.hash > b_ref.hash) 1968 return +1; 1969 else 1970 return md_link_label_cmp(a_ref.label, a_ref.label_size, b_ref.label, b_ref.label_size); 1971 } 1972 1973 extern(C) int md_ref_def_cmp_stable(const(void)* a, const(void)* b) 1974 { 1975 int cmp; 1976 1977 cmp = md_ref_def_cmp(a, b); 1978 1979 /* Ensure stability of the sorting. */ 1980 if(cmp == 0) { 1981 const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; 1982 const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; 1983 1984 if(a_ref < b_ref) 1985 cmp = -1; 1986 else if(a_ref > b_ref) 1987 cmp = +1; 1988 else 1989 cmp = 0; 1990 } 1991 1992 return cmp; 1993 } 1994 1995 int md_build_ref_def_hashtable(MD_CTX* ctx) 1996 { 1997 int i, j; 1998 1999 if(ctx.n_ref_defs == 0) 2000 return 0; 2001 2002 ctx.ref_def_hashtable_size = (ctx.n_ref_defs * 5) / 4; 2003 ctx.ref_def_hashtable = cast(void**) malloc(ctx.ref_def_hashtable_size * (void*).sizeof); 2004 if(ctx.ref_def_hashtable == null) { 2005 ctx.MD_LOG("malloc() failed."); 2006 goto abort; 2007 } 2008 memset(ctx.ref_def_hashtable, 0, ctx.ref_def_hashtable_size * (void*).sizeof); 2009 2010 /* Each member of ctx.ref_def_hashtable[] can be: 2011 * -- null, 2012 * -- pointer to the MD_REF_DEF in ctx.ref_defs[], or 2013 * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to 2014 * such MD_REF_DEFs. 2015 */ 2016 for(i = 0; i < ctx.n_ref_defs; i++) { 2017 MD_REF_DEF* def = &ctx.ref_defs[i]; 2018 void* bucket; 2019 MD_REF_DEF_LIST* list; 2020 2021 def.hash = md_link_label_hash(def.label, def.label_size); 2022 bucket = ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size]; 2023 2024 if(bucket == null) { 2025 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = def; 2026 continue; 2027 } 2028 2029 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { 2030 /* The bucket already contains one ref. def. Lets see whether it 2031 * is the same label (ref. def. duplicate) or different one 2032 * (hash conflict). */ 2033 MD_REF_DEF* old_def = cast(MD_REF_DEF*) bucket; 2034 2035 if(md_link_label_cmp(def.label, def.label_size, old_def.label, old_def.label_size) == 0) { 2036 /* Ignore this ref. def. */ 2037 continue; 2038 } 2039 2040 /* Make the bucket capable of holding more ref. defs. */ 2041 list = cast(MD_REF_DEF_LIST*) malloc(MD_REF_DEF_LIST.SIZEOF(4)); 2042 if(list == null) { 2043 ctx.MD_LOG("malloc() failed."); 2044 goto abort; 2045 } 2046 list.ref_defs_nth(0) = old_def; 2047 list.ref_defs_nth(1) = def; 2048 list.n_ref_defs = 2; 2049 list.alloc_ref_defs = 4; 2050 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; 2051 continue; 2052 } 2053 2054 /* Append the def to the bucket list. */ 2055 list = cast(MD_REF_DEF_LIST*) bucket; 2056 if(list.n_ref_defs >= list.alloc_ref_defs) { 2057 MD_REF_DEF_LIST* list_tmp = cast(MD_REF_DEF_LIST*) realloc(list, MD_REF_DEF_LIST.SIZEOF( 2 * list.alloc_ref_defs )); 2058 if(list_tmp == null) { 2059 ctx.MD_LOG("realloc() failed."); 2060 goto abort; 2061 } 2062 list = list_tmp; 2063 list.alloc_ref_defs *= 2; 2064 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; 2065 } 2066 2067 list.ref_defs_nth(list.n_ref_defs) = def; 2068 list.n_ref_defs++; 2069 } 2070 2071 /* Sort the complex buckets so we can use bsearch() with them. */ 2072 for(i = 0; i < ctx.ref_def_hashtable_size; i++) { 2073 void* bucket = ctx.ref_def_hashtable[i]; 2074 MD_REF_DEF_LIST* list; 2075 2076 if(bucket == null) 2077 continue; 2078 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) 2079 continue; 2080 2081 list = cast(MD_REF_DEF_LIST*) bucket; 2082 qsort(MD_REF_DEF_LIST.refDefs(list).ptr, list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp_stable); 2083 2084 /* Disable duplicates. */ 2085 for(j = 1; j < list.n_ref_defs; j++) { 2086 if(md_ref_def_cmp(&list.ref_defs_nth(j-1), &list.ref_defs_nth(j)) == 0) 2087 list.ref_defs_nth(j) = list.ref_defs_nth(j-1); 2088 } 2089 } 2090 2091 return 0; 2092 2093 abort: 2094 return -1; 2095 } 2096 2097 static void 2098 md_free_ref_def_hashtable(MD_CTX* ctx) 2099 { 2100 if(ctx.ref_def_hashtable != null) { 2101 int i; 2102 2103 for(i = 0; i < ctx.ref_def_hashtable_size; i++) { 2104 void* bucket = ctx.ref_def_hashtable[i]; 2105 if(bucket == null) 2106 continue; 2107 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) 2108 continue; 2109 free(bucket); 2110 } 2111 2112 free(ctx.ref_def_hashtable); 2113 } 2114 } 2115 2116 const(MD_REF_DEF)* md_lookup_ref_def(MD_CTX* ctx, const(CHAR)* label, SZ label_size) 2117 { 2118 uint hash; 2119 void* bucket; 2120 2121 if(ctx.ref_def_hashtable_size == 0) 2122 return null; 2123 2124 hash = md_link_label_hash(label, label_size); 2125 bucket = ctx.ref_def_hashtable[hash % ctx.ref_def_hashtable_size]; 2126 2127 if(bucket == null) { 2128 return null; 2129 } else if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { 2130 const MD_REF_DEF* def = cast(MD_REF_DEF*) bucket; 2131 2132 if(md_link_label_cmp(def.label, def.label_size, label, label_size) == 0) 2133 return def; 2134 else 2135 return null; 2136 } else { 2137 MD_REF_DEF_LIST* list = cast(MD_REF_DEF_LIST*) bucket; 2138 MD_REF_DEF key_buf; 2139 const MD_REF_DEF* key = &key_buf; 2140 const(MD_REF_DEF*)* ret; 2141 2142 key_buf.label = cast(CHAR*) label; 2143 key_buf.label_size = label_size; 2144 key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size); 2145 2146 ret = cast(const(MD_REF_DEF*)*) bsearch(&key, MD_REF_DEF_LIST.refDefs(list).ptr, 2147 list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp); 2148 if(ret != null) 2149 return *ret; 2150 else 2151 return null; 2152 } 2153 } 2154 2155 2156 /*************************** 2157 *** Recognizing Links *** 2158 ***************************/ 2159 2160 /* Note this code is partially shared between processing inlines and blocks 2161 * as reference definitions and links share some helper parser functions. 2162 */ 2163 2164 struct MD_LINK_ATTR 2165 { 2166 OFF dest_beg; 2167 OFF dest_end; 2168 2169 const(CHAR)* title; 2170 SZ title_size; 2171 bool title_needs_free; 2172 } 2173 2174 2175 static int 2176 md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, 2177 OFF* p_end, int* p_beg_line_index, int* p_end_line_index, 2178 OFF* p_contents_beg, OFF* p_contents_end) 2179 { 2180 OFF off = beg; 2181 OFF contents_beg = 0; 2182 OFF contents_end = 0; 2183 int line_index = 0; 2184 int len = 0; 2185 2186 if(ctx.CH(off) != '[') 2187 return FALSE; 2188 off++; 2189 2190 while(1) { 2191 OFF line_end = lines[line_index].end; 2192 2193 while(off < line_end) { 2194 if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 2195 if(contents_end == 0) { 2196 contents_beg = off; 2197 *p_beg_line_index = line_index; 2198 } 2199 contents_end = off + 2; 2200 off += 2; 2201 } else if(ctx.CH(off) == '[') { 2202 return FALSE; 2203 } else if(ctx.CH(off) == ']') { 2204 if(contents_beg < contents_end) { 2205 /* Success. */ 2206 *p_contents_beg = contents_beg; 2207 *p_contents_end = contents_end; 2208 *p_end = off+1; 2209 *p_end_line_index = line_index; 2210 return TRUE; 2211 } else { 2212 /* Link label must have some non-whitespace contents. */ 2213 return FALSE; 2214 } 2215 } else { 2216 uint codepoint; 2217 SZ char_size; 2218 2219 codepoint = md_decode_unicode(ctx.text, off, ctx.size, &char_size); 2220 if(!ISUNICODEWHITESPACE_(codepoint)) { 2221 if(contents_end == 0) { 2222 contents_beg = off; 2223 *p_beg_line_index = line_index; 2224 } 2225 contents_end = off + char_size; 2226 } 2227 2228 off += char_size; 2229 } 2230 2231 len++; 2232 if(len > 999) 2233 return FALSE; 2234 } 2235 2236 line_index++; 2237 len++; 2238 if(line_index < n_lines) 2239 off = lines[line_index].beg; 2240 else 2241 break; 2242 } 2243 2244 return FALSE; 2245 } 2246 2247 static int 2248 md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2249 OFF* p_contents_beg, OFF* p_contents_end) 2250 { 2251 OFF off = beg; 2252 2253 if(off >= max_end || ctx.CH(off) != '<') 2254 return FALSE; 2255 off++; 2256 2257 while(off < max_end) { 2258 if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { 2259 off += 2; 2260 continue; 2261 } 2262 2263 if(ctx.ISNEWLINE(off) || ctx.CH(off) == '<') 2264 return FALSE; 2265 2266 if(ctx.CH(off) == '>') { 2267 /* Success. */ 2268 *p_contents_beg = beg+1; 2269 *p_contents_end = off; 2270 *p_end = off+1; 2271 return TRUE; 2272 } 2273 2274 off++; 2275 } 2276 2277 return FALSE; 2278 } 2279 2280 static int 2281 md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2282 OFF* p_contents_beg, OFF* p_contents_end) 2283 { 2284 OFF off = beg; 2285 int parenthesis_level = 0; 2286 2287 while(off < max_end) { 2288 if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { 2289 off += 2; 2290 continue; 2291 } 2292 2293 if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off)) 2294 break; 2295 2296 /* Link destination may include balanced pairs of unescaped '(' ')'. 2297 * Note we limit the maximal nesting level by 32 to protect us from 2298 * https://github.com/jgm/cmark/issues/214 */ 2299 if(ctx.CH(off) == '(') { 2300 parenthesis_level++; 2301 if(parenthesis_level > 32) 2302 return FALSE; 2303 } else if(ctx.CH(off) == ')') { 2304 if(parenthesis_level == 0) 2305 break; 2306 parenthesis_level--; 2307 } 2308 2309 off++; 2310 } 2311 2312 if(parenthesis_level != 0 || off == beg) 2313 return FALSE; 2314 2315 /* Success. */ 2316 *p_contents_beg = beg; 2317 *p_contents_end = off; 2318 *p_end = off; 2319 return TRUE; 2320 } 2321 2322 static int 2323 md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2324 OFF* p_contents_beg, OFF* p_contents_end) 2325 { 2326 if(ctx.CH(beg) == '<') 2327 return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); 2328 else 2329 return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); 2330 } 2331 2332 static int 2333 md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, 2334 OFF* p_end, int* p_beg_line_index, int* p_end_line_index, 2335 OFF* p_contents_beg, OFF* p_contents_end) 2336 { 2337 OFF off = beg; 2338 CHAR closer_char; 2339 int line_index = 0; 2340 2341 /* White space with up to one line break. */ 2342 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2343 off++; 2344 if(off >= lines[line_index].end) { 2345 line_index++; 2346 if(line_index >= n_lines) 2347 return FALSE; 2348 off = lines[line_index].beg; 2349 } 2350 if(off == beg) 2351 return FALSE; 2352 2353 *p_beg_line_index = line_index; 2354 2355 /* First char determines how to detect end of it. */ 2356 switch(ctx.CH(off)) { 2357 case '"': closer_char = '"'; break; 2358 case '\'': closer_char = '\''; break; 2359 case '(': closer_char = ')'; break; 2360 default: return FALSE; 2361 } 2362 off++; 2363 2364 *p_contents_beg = off; 2365 2366 while(line_index < n_lines) { 2367 OFF line_end = lines[line_index].end; 2368 2369 while(off < line_end) { 2370 if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 2371 off++; 2372 } else if(ctx.CH(off) == closer_char) { 2373 /* Success. */ 2374 *p_contents_end = off; 2375 *p_end = off+1; 2376 *p_end_line_index = line_index; 2377 return TRUE; 2378 } else if(closer_char == ')' && ctx.CH(off) == '(') { 2379 /* ()-style title cannot contain (unescaped '(')) */ 2380 return FALSE; 2381 } 2382 2383 off++; 2384 } 2385 2386 line_index++; 2387 } 2388 2389 return FALSE; 2390 } 2391 2392 /* Returns 0 if it is not a reference definition. 2393 * 2394 * Returns N > 0 if it is a reference definition. N then corresponds to the 2395 * number of lines forming it). In this case the definition is stored for 2396 * resolving any links referring to it. 2397 * 2398 * Returns -1 in case of an error (out of memory). 2399 */ 2400 int md_is_link_reference_definition(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines) 2401 { 2402 OFF label_contents_beg; 2403 OFF label_contents_end; 2404 int label_contents_line_index = -1; 2405 int label_is_multiline; 2406 const(CHAR)* label; 2407 SZ label_size; 2408 bool label_needs_free = false; 2409 OFF dest_contents_beg; 2410 OFF dest_contents_end; 2411 OFF title_contents_beg; 2412 OFF title_contents_end; 2413 int title_contents_line_index; 2414 int title_is_multiline; 2415 OFF off; 2416 int line_index = 0; 2417 int tmp_line_index; 2418 MD_REF_DEF* def; 2419 int ret; 2420 2421 /* Link label. */ 2422 if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg, 2423 &off, &label_contents_line_index, &line_index, 2424 &label_contents_beg, &label_contents_end)) 2425 return FALSE; 2426 label_is_multiline = (label_contents_line_index != line_index); 2427 2428 /* Colon. */ 2429 if(off >= lines[line_index].end || ctx.CH(off) != ':') 2430 return FALSE; 2431 off++; 2432 2433 /* Optional white space with up to one line break. */ 2434 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2435 off++; 2436 if(off >= lines[line_index].end) { 2437 line_index++; 2438 if(line_index >= n_lines) 2439 return FALSE; 2440 off = lines[line_index].beg; 2441 } 2442 2443 /* Link destination. */ 2444 if(!md_is_link_destination(ctx, off, lines[line_index].end, 2445 &off, &dest_contents_beg, &dest_contents_end)) 2446 return FALSE; 2447 2448 /* (Optional) title. Note we interpret it as an title only if nothing 2449 * more follows on its last line. */ 2450 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, 2451 &off, &title_contents_line_index, &tmp_line_index, 2452 &title_contents_beg, &title_contents_end) 2453 && off >= lines[line_index + tmp_line_index].end) 2454 { 2455 title_is_multiline = (tmp_line_index != title_contents_line_index); 2456 title_contents_line_index += line_index; 2457 line_index += tmp_line_index; 2458 } else { 2459 /* Not a title. */ 2460 title_is_multiline = FALSE; 2461 title_contents_beg = off; 2462 title_contents_end = off; 2463 title_contents_line_index = 0; 2464 } 2465 2466 /* Nothing more can follow on the last line. */ 2467 if(off < lines[line_index].end) 2468 return FALSE; 2469 2470 /* Construct label. */ 2471 if(!label_is_multiline) { 2472 label = cast(CHAR*) ctx.STR(label_contents_beg); 2473 label_size = label_contents_end - label_contents_beg; 2474 label_needs_free = false; 2475 } else { 2476 ret = (md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, 2477 lines + label_contents_line_index, n_lines - label_contents_line_index, 2478 ' ', &label, &label_size)); 2479 if (ret < 0) goto abort; 2480 label_needs_free = true; 2481 } 2482 2483 /* Store the reference definition. */ 2484 if(ctx.n_ref_defs >= ctx.alloc_ref_defs) { 2485 MD_REF_DEF* new_defs; 2486 2487 ctx.alloc_ref_defs = (ctx.alloc_ref_defs > 0 ? ctx.alloc_ref_defs * 2 : 16); 2488 new_defs = cast(MD_REF_DEF*) realloc(ctx.ref_defs, ctx.alloc_ref_defs * MD_REF_DEF.sizeof); 2489 if(new_defs == null) { 2490 ctx.MD_LOG("realloc() failed."); 2491 ret = -1; 2492 goto abort; 2493 } 2494 2495 ctx.ref_defs = new_defs; 2496 } 2497 2498 def = &ctx.ref_defs[ctx.n_ref_defs]; 2499 memset(def, 0, MD_REF_DEF.sizeof); 2500 2501 def.label = label; 2502 def.label_size = label_size; 2503 def.label_needs_free = label_needs_free; 2504 2505 def.dest_beg = dest_contents_beg; 2506 def.dest_end = dest_contents_end; 2507 2508 if(title_contents_beg >= title_contents_end) { 2509 def.title = null; 2510 def.title_size = 0; 2511 } else if(!title_is_multiline) { 2512 def.title = cast(CHAR*) ctx.STR(title_contents_beg); 2513 def.title_size = title_contents_end - title_contents_beg; 2514 } else { 2515 ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, 2516 lines + title_contents_line_index, n_lines - title_contents_line_index, 2517 '\n', &def.title, &def.title_size)); 2518 if (ret < 0) goto abort; 2519 def.title_needs_free = true; 2520 } 2521 2522 /* Success. */ 2523 ctx.n_ref_defs++; 2524 return line_index + 1; 2525 2526 abort: 2527 /* Failure. */ 2528 if(label_needs_free) 2529 free(cast(void*)label); // Note: const_cast here 2530 return -1; 2531 } 2532 2533 static int 2534 md_is_link_reference(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, 2535 OFF beg, OFF end, MD_LINK_ATTR* attr) 2536 { 2537 const(MD_REF_DEF)* def; 2538 const(MD_LINE)* beg_line; 2539 const(MD_LINE)* end_line; 2540 const(CHAR)* label; 2541 SZ label_size; 2542 int ret; 2543 2544 assert(ctx.CH(beg) == '[' || ctx.CH(beg) == '!'); 2545 assert(ctx.CH(end-1) == ']'); 2546 2547 beg += (ctx.CH(beg) == '!' ? 2 : 1); 2548 end--; 2549 2550 /* Find lines corresponding to the beg and end positions. */ 2551 assert(lines[0].beg <= beg); 2552 beg_line = lines; 2553 while(beg >= beg_line.end) 2554 beg_line++; 2555 2556 assert(end <= lines[n_lines-1].end); 2557 end_line = beg_line; 2558 while(end >= end_line.end) 2559 end_line++; 2560 2561 if(beg_line != end_line) { 2562 ret = (md_merge_lines_alloc(ctx, beg, end, beg_line, 2563 cast(int)(n_lines - (beg_line - lines)), ' ', &label, &label_size)); 2564 if (ret < 0) goto abort; 2565 } else { 2566 label = cast(CHAR*) ctx.STR(beg); 2567 label_size = end - beg; 2568 } 2569 2570 def = md_lookup_ref_def(ctx, label, label_size); 2571 if(def != null) { 2572 attr.dest_beg = def.dest_beg; 2573 attr.dest_end = def.dest_end; 2574 attr.title = def.title; 2575 attr.title_size = def.title_size; 2576 attr.title_needs_free = false; 2577 } 2578 2579 if(beg_line != end_line) 2580 free(cast(void*)label); // Note: const_cast here 2581 2582 ret = (def != null); 2583 2584 abort: 2585 return ret; 2586 } 2587 2588 static int 2589 md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 2590 OFF beg, OFF* p_end, MD_LINK_ATTR* attr) 2591 { 2592 int line_index = 0; 2593 int tmp_line_index; 2594 OFF title_contents_beg; 2595 OFF title_contents_end; 2596 int title_contents_line_index; 2597 int title_is_multiline; 2598 OFF off = beg; 2599 int ret = FALSE; 2600 2601 while(off >= lines[line_index].end) 2602 line_index++; 2603 2604 assert(ctx.CH(off) == '('); 2605 off++; 2606 2607 /* Optional white space with up to one line break. */ 2608 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2609 off++; 2610 if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { 2611 line_index++; 2612 if(line_index >= n_lines) 2613 return FALSE; 2614 off = lines[line_index].beg; 2615 } 2616 2617 /* Link destination may be omitted, but only when not also having a title. */ 2618 if(off < ctx.size && ctx.CH(off) == ')') { 2619 attr.dest_beg = off; 2620 attr.dest_end = off; 2621 attr.title = null; 2622 attr.title_size = 0; 2623 attr.title_needs_free = false; 2624 off++; 2625 *p_end = off; 2626 return TRUE; 2627 } 2628 2629 /* Link destination. */ 2630 if(!md_is_link_destination(ctx, off, lines[line_index].end, 2631 &off, &attr.dest_beg, &attr.dest_end)) 2632 return FALSE; 2633 2634 /* (Optional) title. */ 2635 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, 2636 &off, &title_contents_line_index, &tmp_line_index, 2637 &title_contents_beg, &title_contents_end)) 2638 { 2639 title_is_multiline = (tmp_line_index != title_contents_line_index); 2640 title_contents_line_index += line_index; 2641 line_index += tmp_line_index; 2642 } else { 2643 /* Not a title. */ 2644 title_is_multiline = FALSE; 2645 title_contents_beg = off; 2646 title_contents_end = off; 2647 title_contents_line_index = 0; 2648 } 2649 2650 /* Optional whitespace followed with final ')'. */ 2651 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2652 off++; 2653 if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { 2654 line_index++; 2655 if(line_index >= n_lines) 2656 return FALSE; 2657 off = lines[line_index].beg; 2658 } 2659 if(ctx.CH(off) != ')') 2660 goto abort; 2661 off++; 2662 2663 if(title_contents_beg >= title_contents_end) { 2664 attr.title = null; 2665 attr.title_size = 0; 2666 attr.title_needs_free = false; 2667 } else if(!title_is_multiline) { 2668 attr.title = cast(CHAR*) ctx.STR(title_contents_beg); // Note: const_cast here! 2669 attr.title_size = title_contents_end - title_contents_beg; 2670 attr.title_needs_free = false; 2671 } else { 2672 ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, 2673 lines + title_contents_line_index, n_lines - title_contents_line_index, 2674 '\n', &attr.title, &attr.title_size)); 2675 if (ret < 0) goto abort; 2676 attr.title_needs_free = true; 2677 } 2678 2679 *p_end = off; 2680 ret = TRUE; 2681 2682 abort: 2683 return ret; 2684 } 2685 2686 void md_free_ref_defs(MD_CTX* ctx) 2687 { 2688 int i; 2689 2690 for(i = 0; i < ctx.n_ref_defs; i++) { 2691 MD_REF_DEF* def = &ctx.ref_defs[i]; 2692 2693 if(def.label_needs_free) 2694 free(cast(void*)def.label); // Note: const_cast here 2695 if(def.title_needs_free) 2696 free(cast(void*)def.title); // Note: const_cast here 2697 } 2698 2699 free(ctx.ref_defs); 2700 } 2701 2702 2703 /****************************************** 2704 *** Processing Inlines (a.k.a Spans) *** 2705 ******************************************/ 2706 2707 /* We process inlines in few phases: 2708 * 2709 * (1) We go through the block text and collect all significant characters 2710 * which may start/end a span or some other significant position into 2711 * ctx.marks[]. Core of this is what md_collect_marks() does. 2712 * 2713 * We also do some very brief preliminary context-less analysis, whether 2714 * it might be opener or closer (e.g. of an emphasis span). 2715 * 2716 * This speeds the other steps as we do not need to re-iterate over all 2717 * characters anymore. 2718 * 2719 * (2) We analyze each potential mark types, in order by their precedence. 2720 * 2721 * In each md_analyze_XXX() function, we re-iterate list of the marks, 2722 * skipping already resolved regions (in preceding precedences) and try to 2723 * resolve them. 2724 * 2725 * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark 2726 * them as resolved. 2727 * 2728 * (2.2) For range-type marks, we analyze whether the mark could be closer 2729 * and, if yes, whether there is some preceding opener it could satisfy. 2730 * 2731 * If not we check whether it could be really an opener and if yes, we 2732 * remember it so subsequent closers may resolve it. 2733 * 2734 * (3) Finally, when all marks were analyzed, we render the block contents 2735 * by calling MD_RENDERER::text() callback, interrupting by ::enter_span() 2736 * or ::close_span() whenever we reach a resolved mark. 2737 */ 2738 2739 2740 /* The mark structure. 2741 * 2742 * '\\': Maybe escape sequence. 2743 * '\0': null char. 2744 * '*': Maybe (strong) emphasis start/end. 2745 * '_': Maybe (strong) emphasis start/end. 2746 * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH). 2747 * '`': Maybe code span start/end. 2748 * '&': Maybe start of entity. 2749 * ';': Maybe end of entity. 2750 * '<': Maybe start of raw HTML or autolink. 2751 * '>': Maybe end of raw HTML or autolink. 2752 * '[': Maybe start of link label or link text. 2753 * '!': Equivalent of '[' for image. 2754 * ']': Maybe end of link label or link text. 2755 * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS). 2756 * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS). 2757 * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS). 2758 * 'D': Dummy mark, it reserves a space for splitting a previous mark 2759 * (e.g. emphasis) or to make more space for storing some special data 2760 * related to the preceding mark (e.g. link). 2761 * 2762 * Note that not all instances of these chars in the text imply creation of the 2763 * structure. Only those which have (or may have, after we see more context) 2764 * the special meaning. 2765 * 2766 * (Keep this struct as small as possible to fit as much of them into CPU 2767 * cache line.) 2768 */ 2769 2770 struct MD_MARK { 2771 OFF beg; 2772 OFF end; 2773 2774 /* For unresolved openers, 'prev' and 'next' form the chain of open openers 2775 * of given type 'ch'. 2776 * 2777 * During resolving, we disconnect from the chain and point to the 2778 * corresponding counterpart so opener points to its closer and vice versa. 2779 */ 2780 int prev; 2781 int next; 2782 CHAR ch; 2783 ubyte flags; 2784 }; 2785 2786 /* Mark flags (these apply to ALL mark types). */ 2787 enum MD_MARK_POTENTIAL_OPENER = 0x01; /* Maybe opener. */ 2788 enum MD_MARK_POTENTIAL_CLOSER = 0x02; /* Maybe closer. */ 2789 enum MD_MARK_OPENER = 0x04; /* Definitely opener. */ 2790 enum MD_MARK_CLOSER = 0x08; /* Definitely closer. */ 2791 enum MD_MARK_RESOLVED = 0x10; /* Resolved in any definite way. */ 2792 2793 /* Mark flags specific for various mark types (so they can share bits). */ 2794 enum MD_MARK_EMPH_INTRAWORD = 0x20; /* Helper for the "rule of 3". */ 2795 enum MD_MARK_EMPH_MOD3_0 = 0x40; 2796 enum MD_MARK_EMPH_MOD3_1 = 0x80; 2797 enum MD_MARK_EMPH_MOD3_2 = (0x40 | 0x80); 2798 enum MD_MARK_EMPH_MOD3_MASK = (0x40 | 0x80); 2799 enum MD_MARK_AUTOLINK = 0x20; /* Distinguisher for '<', '>'. */ 2800 enum MD_MARK_VALIDPERMISSIVEAUTOLINK = 0x20; /* For permissive autolinks. */ 2801 2802 MD_MARKCHAIN* md_asterisk_chain(MD_CTX* ctx, uint flags) 2803 { 2804 switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) 2805 { 2806 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_intraword_mod3_0; 2807 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_intraword_mod3_1; 2808 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_intraword_mod3_2; 2809 case MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_extraword_mod3_0; 2810 case MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_extraword_mod3_1; 2811 case MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_extraword_mod3_2; 2812 default: assert(false); 2813 } 2814 } 2815 2816 MD_MARKCHAIN* md_mark_chain(MD_CTX* ctx, int mark_index) 2817 { 2818 MD_MARK* mark = &ctx.marks[mark_index]; 2819 2820 switch(mark.ch) { 2821 case '*': return md_asterisk_chain(ctx, mark.flags); 2822 case '_': return ctx.UNDERSCORE_OPENERS; 2823 case '~': return ctx.TILDE_OPENERS; 2824 case '[': return ctx.BRACKET_OPENERS; 2825 case '|': return ctx.TABLECELLBOUNDARIES; 2826 default: return null; 2827 } 2828 } 2829 2830 MD_MARK* md_push_mark(MD_CTX* ctx) 2831 { 2832 if(ctx.n_marks >= ctx.alloc_marks) { 2833 MD_MARK* new_marks; 2834 2835 ctx.alloc_marks = (ctx.alloc_marks > 0 ? ctx.alloc_marks * 2 : 64); 2836 new_marks = cast(MD_MARK*) realloc(ctx.marks, ctx.alloc_marks * MD_MARK.sizeof); 2837 if(new_marks == null) { 2838 ctx.MD_LOG("realloc() failed."); 2839 return null; 2840 } 2841 2842 ctx.marks = new_marks; 2843 } 2844 2845 return &ctx.marks[ctx.n_marks++]; 2846 } 2847 2848 int PUSH_MARK_(MD_CTX* ctx, MD_MARK** mark) 2849 { 2850 *mark = md_push_mark(ctx); 2851 if(*mark == null) 2852 { 2853 return -1; 2854 } 2855 return 0; 2856 } 2857 2858 int PUSH_MARK(MD_CTX* ctx, MD_MARK** mark, CHAR ch_, OFF beg_, OFF end_, int flags_) 2859 { 2860 int ret = PUSH_MARK_(ctx, mark); 2861 if (ret != 0) 2862 return ret; 2863 2864 (*mark).beg = (beg_); 2865 (*mark).end = (end_); 2866 (*mark).prev = -1; 2867 (*mark).next = -1; 2868 (*mark).ch = cast(char)(ch_); 2869 (*mark).flags = cast(ubyte)flags_; 2870 return 0; 2871 } 2872 2873 static void 2874 md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) 2875 { 2876 if(chain.tail >= 0) 2877 ctx.marks[chain.tail].next = mark_index; 2878 else 2879 chain.head = mark_index; 2880 2881 ctx.marks[mark_index].prev = chain.tail; 2882 chain.tail = mark_index; 2883 } 2884 2885 /* Sometimes, we need to store a pointer into the mark. It is quite rare 2886 * so we do not bother to make MD_MARK use union, and it can only happen 2887 * for dummy marks. */ 2888 void md_mark_store_ptr(MD_CTX* ctx, int mark_index, const(void)* ptr) 2889 { 2890 MD_MARK* mark = &ctx.marks[mark_index]; 2891 assert(mark.ch == 'D'); 2892 2893 /* Check only members beg and end are misused for this. */ 2894 assert((void*).sizeof <= 2 * OFF.sizeof); 2895 memcpy(mark, &ptr, (void*).sizeof); 2896 } 2897 2898 static void* 2899 md_mark_get_ptr(MD_CTX* ctx, int mark_index) 2900 { 2901 void* ptr; 2902 MD_MARK* mark = &ctx.marks[mark_index]; 2903 assert(mark.ch == 'D'); 2904 memcpy(&ptr, mark, (void*).sizeof); 2905 return ptr; 2906 } 2907 2908 static void 2909 md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index) 2910 { 2911 MD_MARK* opener = &ctx.marks[opener_index]; 2912 MD_MARK* closer = &ctx.marks[closer_index]; 2913 2914 /* Remove opener from the list of openers. */ 2915 if(chain != null) { 2916 if(opener.prev >= 0) 2917 ctx.marks[opener.prev].next = opener.next; 2918 else 2919 chain.head = opener.next; 2920 2921 if(opener.next >= 0) 2922 ctx.marks[opener.next].prev = opener.prev; 2923 else 2924 chain.tail = opener.prev; 2925 } 2926 2927 /* Interconnect opener and closer and mark both as resolved. */ 2928 opener.next = closer_index; 2929 opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; 2930 closer.prev = opener_index; 2931 closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; 2932 } 2933 2934 2935 enum MD_ROLLBACK_ALL = 0; 2936 enum MD_ROLLBACK_CROSSING = 1; 2937 2938 /* In the range ctx.marks[opener_index] ... [closer_index], undo some or all 2939 * resolvings accordingly to these rules: 2940 * 2941 * (1) All openers BEFORE the range corresponding to any closer inside the 2942 * range are un-resolved and they are re-added to their respective chains 2943 * of unresolved openers. This ensures we can reuse the opener for closers 2944 * AFTER the range. 2945 * 2946 * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range 2947 * are discarded. 2948 * 2949 * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled 2950 * in (1) are discarded. I.e. pairs of openers and closers which are both 2951 * inside the range are retained as well as any unpaired marks. 2952 */ 2953 static void 2954 md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) 2955 { 2956 int i; 2957 int mark_index; 2958 2959 /* Cut all unresolved openers at the mark index. */ 2960 for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) { 2961 MD_MARKCHAIN* chain = &ctx.mark_chains[i]; 2962 2963 while(chain.tail >= opener_index) 2964 chain.tail = ctx.marks[chain.tail].prev; 2965 2966 if(chain.tail >= 0) 2967 ctx.marks[chain.tail].next = -1; 2968 else 2969 chain.head = -1; 2970 } 2971 2972 /* Go backwards so that un-resolved openers are re-added into their 2973 * respective chains, in the right order. */ 2974 mark_index = closer_index - 1; 2975 while(mark_index > opener_index) { 2976 MD_MARK* mark = &ctx.marks[mark_index]; 2977 int mark_flags = mark.flags; 2978 int discard_flag = (how == MD_ROLLBACK_ALL); 2979 2980 if(mark.flags & MD_MARK_CLOSER) { 2981 int mark_opener_index = mark.prev; 2982 2983 /* Undo opener BEFORE the range. */ 2984 if(mark_opener_index < opener_index) { 2985 MD_MARK* mark_opener = &ctx.marks[mark_opener_index]; 2986 MD_MARKCHAIN* chain; 2987 2988 mark_opener.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); 2989 chain = md_mark_chain(ctx, opener_index); 2990 if(chain != null) { 2991 md_mark_chain_append(ctx, chain, mark_opener_index); 2992 discard_flag = 1; 2993 } 2994 } 2995 } 2996 2997 /* And reset our flags. */ 2998 if(discard_flag) 2999 mark.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); 3000 3001 /* Jump as far as we can over unresolved or non-interesting marks. */ 3002 switch(how) { 3003 case MD_ROLLBACK_CROSSING: 3004 if((mark_flags & MD_MARK_CLOSER) && mark.prev > opener_index) { 3005 /* If we are closer with opener INSIDE the range, there may 3006 * not be any other crosser inside the subrange. */ 3007 mark_index = mark.prev; 3008 break; 3009 } 3010 goto default; 3011 /* Pass through. */ 3012 default: 3013 mark_index--; 3014 break; 3015 } 3016 } 3017 } 3018 3019 void md_build_mark_char_map(MD_CTX* ctx) 3020 { 3021 memset(ctx.mark_char_map.ptr, 0, ctx.mark_char_map.length); 3022 3023 ctx.mark_char_map['\\'] = 1; 3024 ctx.mark_char_map['*'] = 1; 3025 ctx.mark_char_map['_'] = 1; 3026 ctx.mark_char_map['`'] = 1; 3027 ctx.mark_char_map['&'] = 1; 3028 ctx.mark_char_map[';'] = 1; 3029 ctx.mark_char_map['<'] = 1; 3030 ctx.mark_char_map['>'] = 1; 3031 ctx.mark_char_map['['] = 1; 3032 ctx.mark_char_map['!'] = 1; 3033 ctx.mark_char_map[']'] = 1; 3034 ctx.mark_char_map['\0'] = 1; 3035 3036 if(ctx.parser.flags & MD_FLAG_STRIKETHROUGH) 3037 ctx.mark_char_map['~'] = 1; 3038 3039 if(ctx.parser.flags & MD_FLAG_LATEXMATHSPANS) 3040 ctx.mark_char_map['$'] = 1; 3041 3042 if(ctx.parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS) 3043 ctx.mark_char_map['@'] = 1; 3044 3045 if(ctx.parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS) 3046 ctx.mark_char_map[':'] = 1; 3047 3048 if(ctx.parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) 3049 ctx.mark_char_map['.'] = 1; 3050 3051 if(ctx.parser.flags & MD_FLAG_TABLES) 3052 ctx.mark_char_map['|'] = 1; 3053 3054 if(ctx.parser.flags & MD_FLAG_COLLAPSEWHITESPACE) { 3055 int i; 3056 3057 for(i = 0; i < cast(int) (ctx.mark_char_map).sizeof; i++) { 3058 if(ISWHITESPACE_(cast(CHAR)i)) 3059 ctx.mark_char_map[i] = 1; 3060 } 3061 } 3062 } 3063 3064 /* We limit code span marks to lower then 32 backticks. This solves the 3065 * pathologic case of too many openers, each of different length: Their 3066 * resolving would be then O(n^2). */ 3067 enum CODESPAN_MARK_MAXLEN = 32; 3068 3069 int md_is_code_span(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, 3070 OFF* p_opener_beg, OFF* p_opener_end, 3071 OFF* p_closer_beg, OFF* p_closer_end, 3072 OFF* last_potential_closers, 3073 int* p_reached_paragraph_end) 3074 { 3075 OFF opener_beg = beg; 3076 OFF opener_end; 3077 OFF closer_beg; 3078 OFF closer_end; 3079 SZ mark_len; 3080 OFF line_end; 3081 int has_space_after_opener = FALSE; 3082 int has_eol_after_opener = FALSE; 3083 int has_space_before_closer = FALSE; 3084 int has_eol_before_closer = FALSE; 3085 int has_only_space = TRUE; 3086 int line_index = 0; 3087 3088 line_end = lines[0].end; 3089 opener_end = opener_beg; 3090 while(opener_end < line_end && ctx.CH(opener_end) == '`') 3091 opener_end++; 3092 has_space_after_opener = (opener_end < line_end && ctx.CH(opener_end) == ' '); 3093 has_eol_after_opener = (opener_end == line_end); 3094 3095 /* The caller needs to know end of the opening mark even if we fail. */ 3096 *p_opener_end = opener_end; 3097 3098 mark_len = opener_end - opener_beg; 3099 if(mark_len > CODESPAN_MARK_MAXLEN) 3100 return FALSE; 3101 3102 /* Check whether we already know there is no closer of this length. 3103 * If so, re-scan does no sense. This fixes issue #59. */ 3104 if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end || 3105 (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end)) 3106 return FALSE; 3107 3108 closer_beg = opener_end; 3109 closer_end = opener_end; 3110 3111 /* Find closer mark. */ 3112 while(TRUE) { 3113 while(closer_beg < line_end && ctx.CH(closer_beg) != '`') { 3114 if(ctx.CH(closer_beg) != ' ') 3115 has_only_space = FALSE; 3116 closer_beg++; 3117 } 3118 closer_end = closer_beg; 3119 while(closer_end < line_end && ctx.CH(closer_end) == '`') 3120 closer_end++; 3121 3122 if(closer_end - closer_beg == mark_len) { 3123 /* Success. */ 3124 has_space_before_closer = (closer_beg > lines[line_index].beg && ctx.CH(closer_beg-1) == ' '); 3125 has_eol_before_closer = (closer_beg == lines[line_index].beg); 3126 break; 3127 } 3128 3129 if(closer_end - closer_beg > 0) { 3130 /* We have found a back-tick which is not part of the closer. */ 3131 has_only_space = FALSE; 3132 3133 /* But if we eventually fail, remember it as a potential closer 3134 * of its own length for future attempts. This mitigates needs for 3135 * rescans. */ 3136 if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) { 3137 if(closer_beg > last_potential_closers[closer_end - closer_beg - 1]) 3138 last_potential_closers[closer_end - closer_beg - 1] = closer_beg; 3139 } 3140 } 3141 3142 if(closer_end >= line_end) { 3143 line_index++; 3144 if(line_index >= n_lines) { 3145 /* Reached end of the paragraph and still nothing. */ 3146 *p_reached_paragraph_end = TRUE; 3147 return FALSE; 3148 } 3149 /* Try on the next line. */ 3150 line_end = lines[line_index].end; 3151 closer_beg = lines[line_index].beg; 3152 } else { 3153 closer_beg = closer_end; 3154 } 3155 } 3156 3157 /* If there is a space or a new line both after and before the opener 3158 * (and if the code span is not made of spaces only), consume one initial 3159 * and one trailing space as part of the marks. */ 3160 if(!has_only_space && 3161 (has_space_after_opener || has_eol_after_opener) && 3162 (has_space_before_closer || has_eol_before_closer)) 3163 { 3164 if(has_space_after_opener) 3165 opener_end++; 3166 else 3167 opener_end = lines[1].beg; 3168 3169 if(has_space_before_closer) 3170 closer_beg--; 3171 else { 3172 closer_beg = lines[line_index-1].end; 3173 /* We need to eat the preceding "\r\n" but not any line trailing 3174 * spaces. */ 3175 while(closer_beg < ctx.size && ctx.ISBLANK(closer_beg)) 3176 closer_beg++; 3177 } 3178 } 3179 3180 *p_opener_beg = opener_beg; 3181 *p_opener_end = opener_end; 3182 *p_closer_beg = closer_beg; 3183 *p_closer_end = closer_end; 3184 return TRUE; 3185 } 3186 3187 static int 3188 md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 3189 { 3190 OFF off = beg+1; 3191 3192 assert(ctx.CH(beg) == '<'); 3193 3194 /* Check for scheme. */ 3195 if(off >= max_end || !ctx.ISASCII(off)) 3196 return FALSE; 3197 off++; 3198 while(1) { 3199 if(off >= max_end) 3200 return FALSE; 3201 if(off - beg > 32) 3202 return FALSE; 3203 if(ctx.CH(off) == ':' && off - beg >= 3) 3204 break; 3205 if(!ctx.ISALNUM(off) && ctx.CH(off) != '+' && ctx.CH(off) != '-' && ctx.CH(off) != '.') 3206 return FALSE; 3207 off++; 3208 } 3209 3210 /* Check the path after the scheme. */ 3211 while(off < max_end && ctx.CH(off) != '>') { 3212 if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off) || ctx.CH(off) == '<') 3213 return FALSE; 3214 off++; 3215 } 3216 3217 if(off >= max_end) 3218 return FALSE; 3219 3220 assert(ctx.CH(off) == '>'); 3221 *p_end = off+1; 3222 return TRUE; 3223 } 3224 3225 static int 3226 md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 3227 { 3228 OFF off = beg + 1; 3229 int label_len; 3230 3231 assert(ctx.CH(beg) == '<'); 3232 3233 /* The code should correspond to this regexp: 3234 /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+ 3235 @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? 3236 (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ 3237 */ 3238 3239 /* Username (before '@'). */ 3240 while(off < max_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, ".!#$%&'*+/=?^_`{|}~-"))) 3241 off++; 3242 if(off <= beg+1) 3243 return FALSE; 3244 3245 /* '@' */ 3246 if(off >= max_end || ctx.CH(off) != '@') 3247 return FALSE; 3248 off++; 3249 3250 /* Labels delimited with '.'; each label is sequence of 1 - 62 alnum 3251 * characters or '-', but '-' is not allowed as first or last char. */ 3252 label_len = 0; 3253 while(off < max_end) { 3254 if(ctx.ISALNUM(off)) 3255 label_len++; 3256 else if(ctx.CH(off) == '-' && label_len > 0) 3257 label_len++; 3258 else if(ctx.CH(off) == '.' && label_len > 0 && ctx.CH(off-1) != '-') 3259 label_len = 0; 3260 else 3261 break; 3262 3263 if(label_len > 62) 3264 return FALSE; 3265 3266 off++; 3267 } 3268 3269 if(label_len <= 0 || off >= max_end || ctx.CH(off) != '>' || ctx.CH(off-1) == '-') 3270 return FALSE; 3271 3272 *p_end = off+1; 3273 return TRUE; 3274 } 3275 3276 static int 3277 md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto) 3278 { 3279 if(md_is_autolink_uri(ctx, beg, max_end, p_end)) { 3280 *p_missing_mailto = FALSE; 3281 return TRUE; 3282 } 3283 3284 if(md_is_autolink_email(ctx, beg, max_end, p_end)) { 3285 *p_missing_mailto = TRUE; 3286 return TRUE; 3287 } 3288 3289 return FALSE; 3290 } 3291 3292 /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */ 3293 bool IS_MARK_CHAR(MD_CTX* ctx, OFF off) 3294 { 3295 return (ctx.mark_char_map[cast(ubyte) ctx.CH(off)]) != 0; 3296 } 3297 3298 int md_collect_marks(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, int table_mode) 3299 { 3300 int i; 3301 int ret = 0; 3302 MD_MARK* mark; 3303 OFF[CODESPAN_MARK_MAXLEN] codespan_last_potential_closers = 3304 [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3305 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; 3306 3307 int codespan_scanned_till_paragraph_end = FALSE; 3308 3309 for(i = 0; i < n_lines; i++) 3310 { 3311 const(MD_LINE)* line = &lines[i]; 3312 OFF off = line.beg; 3313 OFF line_end = line.end; 3314 3315 while(true) 3316 { 3317 CHAR ch; 3318 3319 /* Optimization: Use some loop unrolling. */ 3320 while(off + 3 < line_end && !IS_MARK_CHAR(ctx, off+0) && !IS_MARK_CHAR(ctx, off+1) 3321 && !IS_MARK_CHAR(ctx, off+2) && !IS_MARK_CHAR(ctx, off+3)) 3322 off += 4; 3323 while(off < line_end && !IS_MARK_CHAR(ctx, off+0)) 3324 off++; 3325 3326 if(off >= line_end) 3327 break; 3328 3329 ch = ctx.CH(off); 3330 3331 /* A backslash escape. 3332 * It can go beyond line.end as it may involve escaped new 3333 * line to form a hard break. */ 3334 if(ch == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 3335 /* Hard-break cannot be on the last line of the block. */ 3336 if(!ctx.ISNEWLINE(off+1) || i+1 < n_lines) 3337 { 3338 ret = PUSH_MARK(ctx, &mark, ch, off, off+2, MD_MARK_RESOLVED); 3339 if (ret != 0) goto abort; 3340 } 3341 off += 2; 3342 continue; 3343 } 3344 3345 /* A potential (string) emphasis start/end. */ 3346 if(ch == '*' || ch == '_') { 3347 OFF tmp = off+1; 3348 int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */ 3349 int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */ 3350 3351 while(tmp < line_end && ctx.CH(tmp) == ch) 3352 tmp++; 3353 3354 if(off == line.beg || ctx.ISUNICODEWHITESPACEBEFORE(off)) 3355 left_level = 0; 3356 else if(ctx.ISUNICODEPUNCTBEFORE(off)) 3357 left_level = 1; 3358 else 3359 left_level = 2; 3360 3361 if(tmp == line_end || ctx.ISUNICODEWHITESPACE(tmp)) 3362 right_level = 0; 3363 else if(ctx.ISUNICODEPUNCT(tmp)) 3364 right_level = 1; 3365 else 3366 right_level = 2; 3367 3368 /* Intra-word underscore doesn't have special meaning. */ 3369 if(ch == '_' && left_level == 2 && right_level == 2) { 3370 left_level = 0; 3371 right_level = 0; 3372 } 3373 3374 if(left_level != 0 || right_level != 0) { 3375 uint flags = 0; 3376 3377 if(left_level > 0 && left_level >= right_level) 3378 flags |= MD_MARK_POTENTIAL_CLOSER; 3379 if(right_level > 0 && right_level >= left_level) 3380 flags |= MD_MARK_POTENTIAL_OPENER; 3381 if(left_level == 2 && right_level == 2) 3382 flags |= MD_MARK_EMPH_INTRAWORD; 3383 3384 /* For "the rule of three" we need to remember the original 3385 * size of the mark (modulo three), before we potentially 3386 * split the mark when being later resolved partially by some 3387 * shorter closer. */ 3388 switch((tmp - off) % 3) 3389 { 3390 case 0: flags |= MD_MARK_EMPH_MOD3_0; break; 3391 case 1: flags |= MD_MARK_EMPH_MOD3_1; break; 3392 case 2: flags |= MD_MARK_EMPH_MOD3_2; break; 3393 default: break; 3394 } 3395 3396 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, flags); 3397 if (ret != 0) goto abort; 3398 3399 /* During resolving, multiple asterisks may have to be 3400 * split into independent span start/ends. Consider e.g. 3401 * "**foo* bar*". Therefore we push also some empty dummy 3402 * marks to have enough space for that. */ 3403 off++; 3404 while(off < tmp) { 3405 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3406 if (ret != 0) goto abort; 3407 off++; 3408 } 3409 continue; 3410 } 3411 3412 off = tmp; 3413 continue; 3414 } 3415 3416 /* A potential code span start/end. */ 3417 if(ch == '`') { 3418 OFF opener_beg, opener_end; 3419 OFF closer_beg, closer_end; 3420 int is_code_span; 3421 3422 is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off, 3423 &opener_beg, &opener_end, &closer_beg, &closer_end, 3424 codespan_last_potential_closers.ptr, 3425 &codespan_scanned_till_paragraph_end); 3426 if(is_code_span) { 3427 ret = PUSH_MARK(ctx, &mark, '`', opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED); 3428 if (ret != 0) goto abort; 3429 ret = PUSH_MARK(ctx, &mark, '`', closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); 3430 if (ret != 0) goto abort; 3431 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3432 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3433 3434 off = closer_end; 3435 3436 /* Advance the current line accordingly. */ 3437 while(off > line_end) { 3438 i++; 3439 line++; 3440 line_end = line.end; 3441 } 3442 continue; 3443 } 3444 3445 off = opener_end; 3446 continue; 3447 } 3448 3449 /* A potential entity start. */ 3450 if(ch == '&') { 3451 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); 3452 if (ret != 0) goto abort; 3453 off++; 3454 continue; 3455 } 3456 3457 /* A potential entity end. */ 3458 if(ch == ';') { 3459 /* We surely cannot be entity unless the previous mark is '&'. */ 3460 if(ctx.n_marks > 0 && ctx.marks[ctx.n_marks-1].ch == '&') 3461 { 3462 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); 3463 if (ret != 0) goto abort; 3464 } 3465 3466 off++; 3467 continue; 3468 } 3469 3470 /* A potential autolink or raw HTML start/end. */ 3471 if(ch == '<') { 3472 int is_autolink; 3473 OFF autolink_end; 3474 int missing_mailto; 3475 3476 if(!(ctx.parser.flags & MD_FLAG_NOHTMLSPANS)) { 3477 int is_html; 3478 OFF html_end; 3479 3480 /* Given the nature of the raw HTML, we have to recognize 3481 * it here. Doing so later in md_analyze_lt_gt() could 3482 * open can of worms of quadratic complexity. */ 3483 is_html = md_is_html_any(ctx, lines + i, n_lines - i, off, 3484 lines[n_lines-1].end, &html_end); 3485 if(is_html) { 3486 ret = PUSH_MARK(ctx, &mark, '<', off, off, MD_MARK_OPENER | MD_MARK_RESOLVED); 3487 if (ret != 0) goto abort; 3488 ret = PUSH_MARK(ctx, &mark, '>', html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); 3489 if (ret != 0) goto abort; 3490 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3491 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3492 off = html_end; 3493 3494 /* Advance the current line accordingly. */ 3495 while(off > line_end) { 3496 i++; 3497 line++; 3498 line_end = line.end; 3499 } 3500 continue; 3501 } 3502 } 3503 3504 is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end, 3505 &autolink_end, &missing_mailto); 3506 if(is_autolink) { 3507 ret = PUSH_MARK(ctx, &mark, (missing_mailto ? '@' : '<'), off, off+1, 3508 MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); 3509 if (ret != 0) goto abort; 3510 ret = PUSH_MARK(ctx, &mark, '>', autolink_end-1, autolink_end, 3511 MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); 3512 if (ret != 0) goto abort; 3513 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3514 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3515 off = autolink_end; 3516 continue; 3517 } 3518 3519 off++; 3520 continue; 3521 } 3522 3523 /* A potential link or its part. */ 3524 if(ch == '[' || (ch == '!' && off+1 < line_end && ctx.CH(off+1) == '[')) { 3525 OFF tmp = (ch == '[' ? off+1 : off+2); 3526 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER); 3527 if (ret != 0) goto abort; 3528 off = tmp; 3529 /* Two dummies to make enough place for data we need if it is 3530 * a link. */ 3531 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3532 if (ret != 0) goto abort; 3533 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3534 if (ret != 0) goto abort; 3535 continue; 3536 } 3537 if(ch == ']') { 3538 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); 3539 if (ret != 0) goto abort; 3540 off++; 3541 continue; 3542 } 3543 3544 /* A potential permissive e-mail autolink. */ 3545 if(ch == '@') { 3546 if(line.beg + 1 <= off && ctx.ISALNUM(off-1) && 3547 off + 3 < line.end && ctx.ISALNUM(off+1)) 3548 { 3549 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); 3550 if (ret != 0) goto abort; 3551 /* Push a dummy as a reserve for a closer. */ 3552 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3553 if (ret != 0) goto abort; 3554 } 3555 3556 off++; 3557 continue; 3558 } 3559 3560 /* A potential permissive URL autolink. */ 3561 if(ch == ':') 3562 { 3563 static struct Scheme 3564 { 3565 const(CHAR)* scheme; 3566 SZ scheme_size; 3567 const(CHAR)* suffix; 3568 SZ suffix_size; 3569 } 3570 3571 static immutable Scheme[] scheme_map = 3572 [ 3573 Scheme("http", 4, "//", 2), 3574 Scheme("https", 5, "//", 2), 3575 Scheme("ftp", 3, "//", 2) 3576 ]; 3577 3578 int scheme_index; 3579 3580 for(scheme_index = 0; scheme_index < cast(int) (scheme_map.length); scheme_index++) { 3581 const(CHAR)* scheme = scheme_map[scheme_index].scheme; 3582 const SZ scheme_size = scheme_map[scheme_index].scheme_size; 3583 const(CHAR)* suffix = scheme_map[scheme_index].suffix; 3584 const SZ suffix_size = scheme_map[scheme_index].suffix_size; 3585 3586 if(line.beg + scheme_size <= off && md_ascii_eq(ctx.STR(off-scheme_size), scheme, scheme_size) && 3587 (line.beg + scheme_size == off || ctx.ISWHITESPACE(off-scheme_size-1) || ctx.ISANYOF(off-scheme_size-1, "*_~([")) && 3588 off + 1 + suffix_size < line.end && md_ascii_eq(ctx.STR(off+1), suffix, suffix_size)) 3589 { 3590 ret = PUSH_MARK(ctx, &mark, ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER); 3591 if (ret != 0) goto abort; 3592 /* Push a dummy as a reserve for a closer. */ 3593 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3594 if (ret != 0) goto abort; 3595 off += 1 + suffix_size; 3596 continue; 3597 } 3598 } 3599 3600 off++; 3601 continue; 3602 } 3603 3604 /* A potential permissive WWW autolink. */ 3605 if(ch == '.') { 3606 if(line.beg + 3 <= off && md_ascii_eq(ctx.STR(off-3), "www", 3) && 3607 (line.beg + 3 == off || ctx.ISWHITESPACE(off-4) || ctx.ISANYOF(off-4, "*_~([")) && 3608 off + 1 < line_end) 3609 { 3610 ret = PUSH_MARK(ctx, &mark, ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER); 3611 if (ret != 0) goto abort; 3612 /* Push a dummy as a reserve for a closer. */ 3613 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3614 if (ret != 0) goto abort; 3615 off++; 3616 continue; 3617 } 3618 3619 off++; 3620 continue; 3621 } 3622 3623 /* A potential table cell boundary. */ 3624 if(table_mode && ch == '|') { 3625 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, 0); 3626 if (ret != 0) goto abort; 3627 off++; 3628 continue; 3629 } 3630 3631 /* A potential strikethrough start/end. */ 3632 if(ch == '~') { 3633 OFF tmp = off+1; 3634 3635 while(tmp < line_end && ctx.CH(tmp) == '~') 3636 tmp++; 3637 3638 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); 3639 if (ret != 0) goto abort; 3640 off = tmp; 3641 continue; 3642 } 3643 3644 /* A potential equation start/end */ 3645 if(ch == '$') { 3646 /* We can have at most two consecutive $ signs, 3647 * where two dollar signs signify a display equation. */ 3648 OFF tmp = off+1; 3649 3650 while(tmp < line_end && ctx.CH(tmp) == '$') 3651 tmp++; 3652 3653 if (tmp - off <= 2) 3654 { 3655 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); 3656 if (ret != 0) goto abort; 3657 } 3658 off = tmp; 3659 continue; 3660 } 3661 3662 /* Turn non-trivial whitespace into single space. */ 3663 if(ISWHITESPACE_(ch)) { 3664 OFF tmp = off+1; 3665 3666 while(tmp < line_end && ctx.ISWHITESPACE(tmp)) 3667 tmp++; 3668 3669 if(tmp - off > 1 || ch != ' ') 3670 { 3671 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_RESOLVED); 3672 if (ret != 0) goto abort; 3673 } 3674 3675 off = tmp; 3676 continue; 3677 } 3678 3679 /* null character. */ 3680 if(ch == '\0') { 3681 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_RESOLVED); 3682 if (ret != 0) goto abort; 3683 off++; 3684 continue; 3685 } 3686 3687 off++; 3688 } 3689 } 3690 3691 /* Add a dummy mark at the end of the mark vector to simplify 3692 * process_inlines(). */ 3693 ret = PUSH_MARK(ctx, &mark, 127, ctx.size, ctx.size, MD_MARK_RESOLVED); 3694 if (ret != 0) goto abort; 3695 3696 abort: 3697 return ret; 3698 } 3699 3700 static void 3701 md_analyze_bracket(MD_CTX* ctx, int mark_index) 3702 { 3703 /* We cannot really resolve links here as for that we would need 3704 * more context. E.g. a following pair of brackets (reference link), 3705 * or enclosing pair of brackets (if the inner is the link, the outer 3706 * one cannot be.) 3707 * 3708 * Therefore we here only construct a list of resolved '[' ']' pairs 3709 * ordered by position of the closer. This allows ur to analyze what is 3710 * or is not link in the right order, from inside to outside in case 3711 * of nested brackets. 3712 * 3713 * The resolving itself is deferred into md_resolve_links(). 3714 */ 3715 3716 MD_MARK* mark = &ctx.marks[mark_index]; 3717 3718 if(mark.flags & MD_MARK_POTENTIAL_OPENER) { 3719 md_mark_chain_append(ctx, ctx.BRACKET_OPENERS, mark_index); 3720 return; 3721 } 3722 3723 if(ctx.BRACKET_OPENERS.tail >= 0) { 3724 /* Pop the opener from the chain. */ 3725 int opener_index = ctx.BRACKET_OPENERS.tail; 3726 MD_MARK* opener = &ctx.marks[opener_index]; 3727 if(opener.prev >= 0) 3728 ctx.marks[opener.prev].next = -1; 3729 else 3730 ctx.BRACKET_OPENERS.head = -1; 3731 ctx.BRACKET_OPENERS.tail = opener.prev; 3732 3733 /* Interconnect the opener and closer. */ 3734 opener.next = mark_index; 3735 mark.prev = opener_index; 3736 3737 /* Add the pair into chain of potential links for md_resolve_links(). 3738 * Note we misuse opener.prev for this as opener.next points to its 3739 * closer. */ 3740 if(ctx.unresolved_link_tail >= 0) 3741 ctx.marks[ctx.unresolved_link_tail].prev = opener_index; 3742 else 3743 ctx.unresolved_link_head = opener_index; 3744 ctx.unresolved_link_tail = opener_index; 3745 opener.prev = -1; 3746 } 3747 } 3748 3749 /* Forward declaration. */ 3750 static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 3751 int mark_beg, int mark_end); 3752 3753 static int 3754 md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 3755 { 3756 int opener_index = ctx.unresolved_link_head; 3757 OFF last_link_beg = 0; 3758 OFF last_link_end = 0; 3759 OFF last_img_beg = 0; 3760 OFF last_img_end = 0; 3761 3762 while(opener_index >= 0) { 3763 MD_MARK* opener = &ctx.marks[opener_index]; 3764 int closer_index = opener.next; 3765 MD_MARK* closer = &ctx.marks[closer_index]; 3766 int next_index = opener.prev; 3767 MD_MARK* next_opener; 3768 MD_MARK* next_closer; 3769 MD_LINK_ATTR attr; 3770 int is_link = FALSE; 3771 3772 if(next_index >= 0) { 3773 next_opener = &ctx.marks[next_index]; 3774 next_closer = &ctx.marks[next_opener.next]; 3775 } else { 3776 next_opener = null; 3777 next_closer = null; 3778 } 3779 3780 /* If nested ("[ [ ] ]"), we need to make sure that: 3781 * - The outer does not end inside of (...) belonging to the inner. 3782 * - The outer cannot be link if the inner is link (i.e. not image). 3783 * 3784 * (Note we here analyze from inner to outer as the marks are ordered 3785 * by closer.beg.) 3786 */ 3787 if((opener.beg < last_link_beg && closer.end < last_link_end) || 3788 (opener.beg < last_img_beg && closer.end < last_img_end) || 3789 (opener.beg < last_link_end && opener.ch == '[')) 3790 { 3791 opener_index = next_index; 3792 continue; 3793 } 3794 3795 if(next_opener != null && next_opener.beg == closer.end) { 3796 if(next_closer.beg > closer.end + 1) { 3797 /* Might be full reference link. */ 3798 is_link = md_is_link_reference(ctx, lines, n_lines, next_opener.beg, next_closer.end, &attr); 3799 } else { 3800 /* Might be shortcut reference link. */ 3801 is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); 3802 } 3803 3804 if(is_link < 0) 3805 return -1; 3806 3807 if(is_link) { 3808 /* Eat the 2nd "[...]". */ 3809 closer.end = next_closer.end; 3810 } 3811 } else { 3812 if(closer.end < ctx.size && ctx.CH(closer.end) == '(') { 3813 /* Might be inline link. */ 3814 OFF inline_link_end = uint.max; 3815 3816 is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer.end, &inline_link_end, &attr); 3817 if(is_link < 0) 3818 return -1; 3819 3820 /* Check the closing ')' is not inside an already resolved range 3821 * (i.e. a range with a higher priority), e.g. a code span. */ 3822 if(is_link) { 3823 int i = closer_index + 1; 3824 3825 while(i < ctx.n_marks) { 3826 MD_MARK* mark = &ctx.marks[i]; 3827 3828 if(mark.beg >= inline_link_end) 3829 break; 3830 if((mark.flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { 3831 if(ctx.marks[mark.next].beg >= inline_link_end) { 3832 /* Cancel the link status. */ 3833 if(attr.title_needs_free) 3834 free(cast(void*)(attr.title)); 3835 is_link = FALSE; 3836 break; 3837 } 3838 3839 i = mark.next + 1; 3840 } else { 3841 i++; 3842 } 3843 } 3844 } 3845 3846 if(is_link) { 3847 /* Eat the "(...)" */ 3848 closer.end = inline_link_end; 3849 } 3850 } 3851 3852 if(!is_link) { 3853 /* Might be collapsed reference link. */ 3854 is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); 3855 if(is_link < 0) 3856 return -1; 3857 } 3858 } 3859 3860 if(is_link) { 3861 /* Resolve the brackets as a link. */ 3862 opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; 3863 closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; 3864 3865 /* If it is a link, we store the destination and title in the two 3866 * dummy marks after the opener. */ 3867 assert(ctx.marks[opener_index+1].ch == 'D'); 3868 ctx.marks[opener_index+1].beg = attr.dest_beg; 3869 ctx.marks[opener_index+1].end = attr.dest_end; 3870 3871 assert(ctx.marks[opener_index+2].ch == 'D'); 3872 md_mark_store_ptr(ctx, opener_index+2, attr.title); 3873 if(attr.title_needs_free) 3874 md_mark_chain_append(ctx, ctx.PTR_CHAIN, opener_index+2); 3875 ctx.marks[opener_index+2].prev = attr.title_size; 3876 3877 if(opener.ch == '[') { 3878 last_link_beg = opener.beg; 3879 last_link_end = closer.end; 3880 } else { 3881 last_img_beg = opener.beg; 3882 last_img_end = closer.end; 3883 } 3884 3885 md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); 3886 } 3887 3888 opener_index = next_index; 3889 } 3890 3891 return 0; 3892 } 3893 3894 /* Analyze whether the mark '&' starts a HTML entity. 3895 * If so, update its flags as well as flags of corresponding closer ';'. */ 3896 static void 3897 md_analyze_entity(MD_CTX* ctx, int mark_index) 3898 { 3899 MD_MARK* opener = &ctx.marks[mark_index]; 3900 MD_MARK* closer; 3901 OFF off; 3902 3903 /* Cannot be entity if there is no closer as the next mark. 3904 * (Any other mark between would mean strange character which cannot be 3905 * part of the entity. 3906 * 3907 * So we can do all the work on '&' and do not call this later for the 3908 * closing mark ';'. 3909 */ 3910 if(mark_index + 1 >= ctx.n_marks) 3911 return; 3912 closer = &ctx.marks[mark_index+1]; 3913 if(closer.ch != ';') 3914 return; 3915 3916 if(md_is_entity(ctx, opener.beg, closer.end, &off)) { 3917 assert(off == closer.end); 3918 3919 md_resolve_range(ctx, null, mark_index, mark_index+1); 3920 opener.end = closer.end; 3921 } 3922 } 3923 3924 static void 3925 md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index) 3926 { 3927 MD_MARK* mark = &ctx.marks[mark_index]; 3928 mark.flags |= MD_MARK_RESOLVED; 3929 3930 md_mark_chain_append(ctx, ctx.TABLECELLBOUNDARIES, mark_index); 3931 ctx.n_table_cell_boundaries++; 3932 } 3933 3934 /* Split a longer mark into two. The new mark takes the given count of 3935 * characters. May only be called if an adequate number of dummy 'D' marks 3936 * follows. 3937 */ 3938 static int 3939 md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n) 3940 { 3941 MD_MARK* mark = &ctx.marks[mark_index]; 3942 int new_mark_index = mark_index + (mark.end - mark.beg - n); 3943 MD_MARK* dummy = &ctx.marks[new_mark_index]; 3944 3945 assert(mark.end - mark.beg > n); 3946 assert(dummy.ch == 'D'); 3947 3948 memcpy(dummy, mark, MD_MARK.sizeof); 3949 mark.end -= n; 3950 dummy.beg = mark.end; 3951 3952 return new_mark_index; 3953 } 3954 3955 static void 3956 md_analyze_emph(MD_CTX* ctx, int mark_index) 3957 { 3958 MD_MARK* mark = &ctx.marks[mark_index]; 3959 MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); 3960 3961 /* If we can be a closer, try to resolve with the preceding opener. */ 3962 if(mark.flags & MD_MARK_POTENTIAL_CLOSER) { 3963 MD_MARK* opener = null; 3964 int opener_index; 3965 3966 if(mark.ch == '*') { 3967 MD_MARKCHAIN*[6] opener_chains; 3968 int i, n_opener_chains; 3969 uint flags = mark.flags; 3970 3971 /* Apply "rule of three". (This is why we break asterisk opener 3972 * marks into multiple chains.) */ 3973 n_opener_chains = 0; 3974 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_0; 3975 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) 3976 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_1; 3977 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) 3978 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_2; 3979 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_0; 3980 if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) 3981 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_1; 3982 if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) 3983 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_2; 3984 3985 /* Opener is the most recent mark from the allowed chains. */ 3986 for(i = 0; i < n_opener_chains; i++) { 3987 if(opener_chains[i].tail >= 0) { 3988 int tmp_index = opener_chains[i].tail; 3989 MD_MARK* tmp_mark = &ctx.marks[tmp_index]; 3990 if(opener == null || tmp_mark.end > opener.end) { 3991 opener_index = tmp_index; 3992 opener = tmp_mark; 3993 } 3994 } 3995 } 3996 } else { 3997 /* Simple emph. mark */ 3998 if(chain.tail >= 0) { 3999 opener_index = chain.tail; 4000 opener = &ctx.marks[opener_index]; 4001 } 4002 } 4003 4004 /* Resolve, if we have found matching opener. */ 4005 if(opener != null) { 4006 SZ opener_size = opener.end - opener.beg; 4007 SZ closer_size = mark.end - mark.beg; 4008 4009 if(opener_size > closer_size) { 4010 opener_index = md_split_emph_mark(ctx, opener_index, closer_size); 4011 md_mark_chain_append(ctx, md_mark_chain(ctx, opener_index), opener_index); 4012 } else if(opener_size < closer_size) { 4013 md_split_emph_mark(ctx, mark_index, closer_size - opener_size); 4014 } 4015 4016 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); 4017 md_resolve_range(ctx, chain, opener_index, mark_index); 4018 return; 4019 } 4020 } 4021 4022 /* If we could not resolve as closer, we may be yet be an opener. */ 4023 if(mark.flags & MD_MARK_POTENTIAL_OPENER) 4024 md_mark_chain_append(ctx, chain, mark_index); 4025 } 4026 4027 static void 4028 md_analyze_tilde(MD_CTX* ctx, int mark_index) 4029 { 4030 /* We attempt to be Github Flavored Markdown compatible here. GFM says 4031 * that length of the tilde sequence is not important at all. Note that 4032 * implies the ctx.TILDE_OPENERS chain can have at most one item. */ 4033 4034 if(ctx.TILDE_OPENERS.head >= 0) { 4035 /* The chain already contains an opener, so we may resolve the span. */ 4036 int opener_index = ctx.TILDE_OPENERS.head; 4037 4038 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); 4039 md_resolve_range(ctx, ctx.TILDE_OPENERS, opener_index, mark_index); 4040 } else { 4041 /* We can only be opener. */ 4042 md_mark_chain_append(ctx, ctx.TILDE_OPENERS, mark_index); 4043 } 4044 } 4045 4046 static void 4047 md_analyze_dollar(MD_CTX* ctx, int mark_index) 4048 { 4049 /* This should mimic the way inline equations work in LaTeX, so there 4050 * can only ever be one item in the chain (i.e. the dollars can't be 4051 * nested). This is basically the same as the md_analyze_tilde function, 4052 * except that we require matching openers and closers to be of the same 4053 * length. 4054 * 4055 * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */ 4056 if(ctx.DOLLAR_OPENERS.head >= 0) { 4057 /* If the potential closer has a non-matching number of $, discard */ 4058 MD_MARK* open = &ctx.marks[ctx.DOLLAR_OPENERS.head]; 4059 MD_MARK* close = &ctx.marks[mark_index]; 4060 4061 int opener_index = ctx.DOLLAR_OPENERS.head; 4062 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL); 4063 if (open.end - open.beg == close.end - close.beg) { 4064 /* We are the matching closer */ 4065 md_resolve_range(ctx, ctx.DOLLAR_OPENERS, opener_index, mark_index); 4066 } else { 4067 /* We don't match the opener, so discard old opener and insert as opener */ 4068 md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); 4069 } 4070 } else { 4071 /* No unmatched openers, so we are opener */ 4072 md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); 4073 } 4074 } 4075 4076 static void 4077 md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) 4078 { 4079 MD_MARK* opener = &ctx.marks[mark_index]; 4080 int closer_index = mark_index + 1; 4081 MD_MARK* closer = &ctx.marks[closer_index]; 4082 MD_MARK* next_resolved_mark; 4083 OFF off = opener.end; 4084 int n_dots = FALSE; 4085 int has_underscore_in_last_seg = FALSE; 4086 int has_underscore_in_next_to_last_seg = FALSE; 4087 int n_opened_parenthesis = 0; 4088 4089 /* Check for domain. */ 4090 while(off < ctx.size) { 4091 if(ctx.ISALNUM(off) || ctx.CH(off) == '-') { 4092 off++; 4093 } else if(ctx.CH(off) == '.') { 4094 /* We must see at least one period. */ 4095 n_dots++; 4096 has_underscore_in_next_to_last_seg = has_underscore_in_last_seg; 4097 has_underscore_in_last_seg = FALSE; 4098 off++; 4099 } else if(ctx.CH(off) == '_') { 4100 /* No underscore may be present in the last two domain segments. */ 4101 has_underscore_in_last_seg = TRUE; 4102 off++; 4103 } else { 4104 break; 4105 } 4106 } 4107 if(off > opener.end && ctx.CH(off-1) == '.') { 4108 off--; 4109 n_dots--; 4110 } 4111 if(off <= opener.end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg) 4112 return; 4113 4114 /* Check for path. */ 4115 next_resolved_mark = closer + 1; 4116 while(next_resolved_mark.ch == 'D' || !(next_resolved_mark.flags & MD_MARK_RESOLVED)) 4117 next_resolved_mark++; 4118 while(off < next_resolved_mark.beg && ctx.CH(off) != '<' && !ctx.ISWHITESPACE(off) && !ctx.ISNEWLINE(off)) { 4119 /* Parenthesis must be balanced. */ 4120 if(ctx.CH(off) == '(') { 4121 n_opened_parenthesis++; 4122 } else if(ctx.CH(off) == ')') { 4123 if(n_opened_parenthesis > 0) 4124 n_opened_parenthesis--; 4125 else 4126 break; 4127 } 4128 4129 off++; 4130 } 4131 /* These cannot be last char In such case they are more likely normal 4132 * punctuation. */ 4133 if(ctx.ISANYOF(off-1, "?!.,:*_~")) 4134 off--; 4135 4136 /* Ok. Lets call it auto-link. Adapt opener and create closer to zero 4137 * length so all the contents becomes the link text. */ 4138 assert(closer.ch == 'D'); 4139 opener.end = opener.beg; 4140 closer.ch = opener.ch; 4141 closer.beg = off; 4142 closer.end = off; 4143 md_resolve_range(ctx, null, mark_index, closer_index); 4144 } 4145 4146 /* The permissive autolinks do not have to be enclosed in '<' '>' but we 4147 * instead impose stricter rules what is understood as an e-mail address 4148 * here. Actually any non-alphanumeric characters with exception of '.' 4149 * are prohibited both in username and after '@'. */ 4150 static void 4151 md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index) 4152 { 4153 MD_MARK* opener = &ctx.marks[mark_index]; 4154 int closer_index; 4155 MD_MARK* closer; 4156 OFF beg = opener.beg; 4157 OFF end = opener.end; 4158 int dot_count = 0; 4159 4160 assert(ctx.CH(beg) == '@'); 4161 4162 /* Scan for name before '@'. */ 4163 while(beg > 0 && (ctx.ISALNUM(beg-1) || ctx.ISANYOF(beg-1, ".-_+"))) 4164 beg--; 4165 4166 /* Scan for domain after '@'. */ 4167 while(end < ctx.size && (ctx.ISALNUM(end) || ctx.ISANYOF(end, ".-_"))) { 4168 if(ctx.CH(end) == '.') 4169 dot_count++; 4170 end++; 4171 } 4172 if(ctx.CH(end-1) == '.') { /* Final '.' not part of it. */ 4173 dot_count--; 4174 end--; 4175 } 4176 else if(ctx.ISANYOF2(end-1, '-', '_')) /* These are forbidden at the end. */ 4177 return; 4178 if(ctx.CH(end-1) == '@' || dot_count == 0) 4179 return; 4180 4181 /* Ok. Lets call it auto-link. Adapt opener and create closer to zero 4182 * length so all the contents becomes the link text. */ 4183 closer_index = mark_index + 1; 4184 closer = &ctx.marks[closer_index]; 4185 assert(closer.ch == 'D'); 4186 4187 opener.beg = beg; 4188 opener.end = beg; 4189 closer.ch = opener.ch; 4190 closer.beg = end; 4191 closer.end = end; 4192 md_resolve_range(ctx, null, mark_index, closer_index); 4193 } 4194 4195 static void 4196 md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 4197 int mark_beg, int mark_end, const(CHAR)* mark_chars) 4198 { 4199 int i = mark_beg; 4200 4201 while(i < mark_end) { 4202 MD_MARK* mark = &ctx.marks[i]; 4203 4204 /* Skip resolved spans. */ 4205 if(mark.flags & MD_MARK_RESOLVED) { 4206 if(mark.flags & MD_MARK_OPENER) { 4207 assert(i < mark.next); 4208 i = mark.next + 1; 4209 } else { 4210 i++; 4211 } 4212 continue; 4213 } 4214 4215 /* Skip marks we do not want to deal with. */ 4216 if(!ISANYOF_(mark.ch, mark_chars)) { 4217 i++; 4218 continue; 4219 } 4220 4221 /* Analyze the mark. */ 4222 switch(mark.ch) { 4223 case '[': /* Pass through. */ 4224 case '!': /* Pass through. */ 4225 case ']': md_analyze_bracket(ctx, i); break; 4226 case '&': md_analyze_entity(ctx, i); break; 4227 case '|': md_analyze_table_cell_boundary(ctx, i); break; 4228 case '_': /* Pass through. */ 4229 case '*': md_analyze_emph(ctx, i); break; 4230 case '~': md_analyze_tilde(ctx, i); break; 4231 case '$': md_analyze_dollar(ctx, i); break; 4232 case '.': /* Pass through. */ 4233 case ':': md_analyze_permissive_url_autolink(ctx, i); break; 4234 case '@': md_analyze_permissive_email_autolink(ctx, i); break; 4235 default: break; 4236 } 4237 4238 i++; 4239 } 4240 } 4241 4242 /* Analyze marks (build ctx.marks). */ 4243 static int 4244 md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) 4245 { 4246 int ret; 4247 4248 /* Reset the previously collected stack of marks. */ 4249 ctx.n_marks = 0; 4250 4251 /* Collect all marks. */ 4252 ret = (md_collect_marks(ctx, lines, n_lines, table_mode)); 4253 if (ret < 0) goto abort; 4254 4255 /* We analyze marks in few groups to handle their precedence. */ 4256 /* (1) Entities; code spans; autolinks; raw HTML. */ 4257 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "&"); 4258 4259 if(table_mode) { 4260 /* (2) Analyze table cell boundaries. 4261 * Note we reset ctx.TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(), 4262 * not after, because caller may need it. */ 4263 assert(n_lines == 1); 4264 ctx.TABLECELLBOUNDARIES.head = -1; 4265 ctx.TABLECELLBOUNDARIES.tail = -1; 4266 ctx.n_table_cell_boundaries = 0; 4267 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "|"); 4268 return ret; 4269 } 4270 4271 /* (3) Links. */ 4272 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "[]!"); 4273 ret = (md_resolve_links(ctx, lines, n_lines)); 4274 if (ret < 0) goto abort; 4275 ctx.BRACKET_OPENERS.head = -1; 4276 ctx.BRACKET_OPENERS.tail = -1; 4277 ctx.unresolved_link_head = -1; 4278 ctx.unresolved_link_tail = -1; 4279 4280 /* (4) Emphasis and strong emphasis; permissive autolinks. */ 4281 md_analyze_link_contents(ctx, lines, n_lines, 0, ctx.n_marks); 4282 4283 abort: 4284 return ret; 4285 } 4286 4287 static void 4288 md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 4289 int mark_beg, int mark_end) 4290 { 4291 md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, "*_~$@:."); 4292 ctx.ASTERISK_OPENERS_extraword_mod3_0.head = -1; 4293 ctx.ASTERISK_OPENERS_extraword_mod3_0.tail = -1; 4294 ctx.ASTERISK_OPENERS_extraword_mod3_1.head = -1; 4295 ctx.ASTERISK_OPENERS_extraword_mod3_1.tail = -1; 4296 ctx.ASTERISK_OPENERS_extraword_mod3_2.head = -1; 4297 ctx.ASTERISK_OPENERS_extraword_mod3_2.tail = -1; 4298 ctx.ASTERISK_OPENERS_intraword_mod3_0.head = -1; 4299 ctx.ASTERISK_OPENERS_intraword_mod3_0.tail = -1; 4300 ctx.ASTERISK_OPENERS_intraword_mod3_1.head = -1; 4301 ctx.ASTERISK_OPENERS_intraword_mod3_1.tail = -1; 4302 ctx.ASTERISK_OPENERS_intraword_mod3_2.head = -1; 4303 ctx.ASTERISK_OPENERS_intraword_mod3_2.tail = -1; 4304 ctx.UNDERSCORE_OPENERS.head = -1; 4305 ctx.UNDERSCORE_OPENERS.tail = -1; 4306 ctx.TILDE_OPENERS.head = -1; 4307 ctx.TILDE_OPENERS.tail = -1; 4308 ctx.DOLLAR_OPENERS.head = -1; 4309 ctx.DOLLAR_OPENERS.tail = -1; 4310 } 4311 4312 static int 4313 md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type, 4314 const(CHAR)* dest, SZ dest_size, int prohibit_escapes_in_dest, 4315 const(CHAR)* title, SZ title_size) 4316 { 4317 MD_ATTRIBUTE_BUILD href_build = MD_ATTRIBUTE_BUILD.init; 4318 MD_ATTRIBUTE_BUILD title_build = MD_ATTRIBUTE_BUILD.init; 4319 MD_SPAN_A_DETAIL det; 4320 int ret = 0; 4321 4322 /* Note we here rely on fact that MD_SPAN_A_DETAIL and 4323 * MD_SPAN_IMG_DETAIL are binary-compatible. */ 4324 memset(&det, 0, MD_SPAN_A_DETAIL.sizeof); 4325 ret = (md_build_attribute(ctx, dest, dest_size, 4326 (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0), 4327 &det.href, &href_build)); 4328 if (ret < 0) goto abort; 4329 ret = (md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build)); 4330 if (ret < 0) goto abort; 4331 4332 if(enter) 4333 { 4334 ret = MD_ENTER_SPAN(ctx, type, &det); 4335 if (ret != 0) goto abort; 4336 } 4337 else 4338 { 4339 ret = MD_LEAVE_SPAN(ctx, type, &det); 4340 if (ret != 0) goto abort; 4341 } 4342 4343 abort: 4344 md_free_attribute(ctx, &href_build); 4345 md_free_attribute(ctx, &title_build); 4346 return ret; 4347 } 4348 4349 /* Render the output, accordingly to the analyzed ctx.marks. */ 4350 static int 4351 md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 4352 { 4353 MD_TEXTTYPE text_type; 4354 const(MD_LINE)* line = lines; 4355 MD_MARK* prev_mark = null; 4356 MD_MARK* mark; 4357 OFF off = lines[0].beg; 4358 OFF end = lines[n_lines-1].end; 4359 int enforce_hardbreak = 0; 4360 int ret = 0; 4361 4362 /* Find first resolved mark. Note there is always at least one resolved 4363 * mark, the dummy last one after the end of the latest line we actually 4364 * never really reach. This saves us of a lot of special checks and cases 4365 * in this function. */ 4366 mark = ctx.marks; 4367 while(!(mark.flags & MD_MARK_RESOLVED)) 4368 mark++; 4369 4370 text_type = MD_TEXT_NORMAL; 4371 4372 while(1) { 4373 /* Process the text up to the next mark or end-of-line. */ 4374 OFF tmp = (line.end < mark.beg ? line.end : mark.beg); 4375 if(tmp > off) { 4376 ret = MD_TEXT(ctx, text_type, ctx.STR(off), tmp - off); 4377 if (ret != 0) goto abort; 4378 off = tmp; 4379 } 4380 4381 /* If reached the mark, process it and move to next one. */ 4382 if(off >= mark.beg) { 4383 switch(mark.ch) { 4384 case '\\': /* Backslash escape. */ 4385 if(ctx.ISNEWLINE(mark.beg+1)) 4386 enforce_hardbreak = 1; 4387 else 4388 { 4389 ret = MD_TEXT(ctx, text_type, ctx.STR(mark.beg+1), 1); 4390 if (ret != 0) goto abort; 4391 } 4392 break; 4393 4394 case ' ': /* Non-trivial space. */ 4395 ret = MD_TEXT(ctx, text_type, " ", 1); 4396 if (ret != 0) goto abort; 4397 break; 4398 4399 case '`': /* Code span. */ 4400 if(mark.flags & MD_MARK_OPENER) { 4401 ret = MD_ENTER_SPAN(ctx, MD_SPAN_CODE, null); 4402 if (ret != 0) goto abort; 4403 text_type = MD_TEXT_CODE; 4404 } else { 4405 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_CODE, null); 4406 if (ret != 0) goto abort; 4407 text_type = MD_TEXT_NORMAL; 4408 } 4409 break; 4410 4411 case '_': 4412 case '*': /* Emphasis, strong emphasis. */ 4413 if(mark.flags & MD_MARK_OPENER) { 4414 if((mark.end - off) % 2) { 4415 ret = MD_ENTER_SPAN(ctx, MD_SPAN_EM, null); 4416 if (ret != 0) goto abort; 4417 off++; 4418 } 4419 while(off + 1 < mark.end) { 4420 ret = MD_ENTER_SPAN(ctx, MD_SPAN_STRONG, null); 4421 if (ret != 0) goto abort; 4422 off += 2; 4423 } 4424 } else { 4425 while(off + 1 < mark.end) { 4426 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_STRONG, null); 4427 if (ret != 0) goto abort; 4428 off += 2; 4429 } 4430 if((mark.end - off) % 2) { 4431 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_EM, null); 4432 if (ret != 0) goto abort; 4433 off++; 4434 } 4435 } 4436 break; 4437 4438 case '~': 4439 if(mark.flags & MD_MARK_OPENER) 4440 { 4441 ret = MD_ENTER_SPAN(ctx, MD_SPAN_DEL, null); 4442 if (ret != 0) goto abort; 4443 } 4444 else 4445 { 4446 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_DEL, null); 4447 if (ret != 0) goto abort; 4448 } 4449 break; 4450 4451 case '$': 4452 if(mark.flags & MD_MARK_OPENER) { 4453 ret = MD_ENTER_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); 4454 if (ret != 0) goto abort; 4455 text_type = MD_TEXT_LATEXMATH; 4456 } else { 4457 ret = MD_LEAVE_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); 4458 if (ret != 0) goto abort; 4459 text_type = MD_TEXT_NORMAL; 4460 } 4461 break; 4462 4463 case '[': /* Link, image. */ 4464 case '!': 4465 case ']': 4466 { 4467 const MD_MARK* opener = (mark.ch != ']' ? mark : &ctx.marks[mark.prev]); 4468 const MD_MARK* dest_mark = opener+1; 4469 const MD_MARK* title_mark = opener+2; 4470 4471 assert(dest_mark.ch == 'D'); 4472 assert(title_mark.ch == 'D'); 4473 4474 ret = (md_enter_leave_span_a(ctx, (mark.ch != ']') ? 1 : 0, 4475 (opener.ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), 4476 ctx.STR(dest_mark.beg), dest_mark.end - dest_mark.beg, FALSE, 4477 cast(char*) md_mark_get_ptr(ctx, cast(int)(title_mark - ctx.marks)), title_mark.prev)); 4478 if (ret < 0) goto abort; 4479 4480 /* link/image closer may span multiple lines. */ 4481 if(mark.ch == ']') { 4482 while(mark.end > line.end) 4483 line++; 4484 } 4485 4486 break; 4487 } 4488 4489 case '<': 4490 case '>': /* Autolink or raw HTML. */ 4491 if(!(mark.flags & MD_MARK_AUTOLINK)) { 4492 /* Raw HTML. */ 4493 if(mark.flags & MD_MARK_OPENER) 4494 text_type = MD_TEXT_HTML; 4495 else 4496 text_type = MD_TEXT_NORMAL; 4497 break; 4498 } 4499 /* Pass through, if auto-link. */ 4500 goto case '.'; 4501 4502 case '@': /* Permissive e-mail autolink. */ 4503 case ':': /* Permissive URL autolink. */ 4504 case '.': /* Permissive WWW autolink. */ 4505 { 4506 MD_MARK* opener = ((mark.flags & MD_MARK_OPENER) ? mark : &ctx.marks[mark.prev]); 4507 MD_MARK* closer = &ctx.marks[opener.next]; 4508 const(CHAR)* dest = ctx.STR(opener.end); 4509 SZ dest_size = closer.beg - opener.end; 4510 4511 /* For permissive auto-links we do not know closer mark 4512 * position at the time of md_collect_marks(), therefore 4513 * it can be out-of-order in ctx.marks[]. 4514 * 4515 * With this flag, we make sure that we output the closer 4516 * only if we processed the opener. */ 4517 if(mark.flags & MD_MARK_OPENER) 4518 closer.flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK; 4519 4520 if(opener.ch == '@' || opener.ch == '.') { 4521 dest_size += 7; 4522 ret = MD_TEMP_BUFFER(ctx, dest_size * CHAR.sizeof); 4523 if (ret < 0) goto abort; 4524 memcpy(ctx.buffer, 4525 (opener.ch == '@' ? "mailto:" : "http://").ptr, 4526 7 * CHAR.sizeof); 4527 memcpy(ctx.buffer + 7, dest, (dest_size-7) * CHAR.sizeof); 4528 dest = ctx.buffer; 4529 } 4530 4531 if(closer.flags & MD_MARK_VALIDPERMISSIVEAUTOLINK) 4532 { 4533 ret = (md_enter_leave_span_a(ctx, (mark.flags & MD_MARK_OPENER), 4534 MD_SPAN_A, dest, dest_size, TRUE, null, 0)); 4535 if (ret < 0) goto abort; 4536 } 4537 break; 4538 } 4539 4540 case '&': /* Entity. */ 4541 ret = MD_TEXT(ctx, MD_TEXT_ENTITY, ctx.STR(mark.beg), mark.end - mark.beg); 4542 if (ret != 0) goto abort; 4543 break; 4544 4545 case '\0': 4546 ret = MD_TEXT(ctx, MD_TEXT_NULLCHAR, "", 1); 4547 if (ret != 0) goto abort; 4548 break; 4549 4550 case 127: 4551 goto abort; 4552 4553 default: 4554 break; 4555 } 4556 4557 off = mark.end; 4558 4559 /* Move to next resolved mark. */ 4560 prev_mark = mark; 4561 mark++; 4562 while(!(mark.flags & MD_MARK_RESOLVED) || mark.beg < off) 4563 mark++; 4564 } 4565 4566 /* If reached end of line, move to next one. */ 4567 if(off >= line.end) { 4568 /* If it is the last line, we are done. */ 4569 if(off >= end) 4570 break; 4571 4572 if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) { 4573 OFF tmp2; 4574 4575 assert(prev_mark != null); 4576 assert(ISANYOF2_(prev_mark.ch, '`', '$') && (prev_mark.flags & MD_MARK_OPENER)); 4577 assert(ISANYOF2_(mark.ch, '`', '$') && (mark.flags & MD_MARK_CLOSER)); 4578 4579 /* Inside a code span, trailing line whitespace has to be 4580 * outputted. */ 4581 tmp2 = off; 4582 while(off < ctx.size && ctx.ISBLANK(off)) 4583 off++; 4584 if(off > tmp2) 4585 { 4586 ret = MD_TEXT(ctx, text_type, ctx.STR(tmp2), off-tmp2); 4587 if (ret != 0) goto abort; 4588 } 4589 4590 /* and new lines are transformed into single spaces. */ 4591 if(prev_mark.end < off && off < mark.beg) 4592 { 4593 ret = MD_TEXT(ctx, text_type, " ", 1); 4594 if (ret != 0) goto abort; 4595 } 4596 } else if(text_type == MD_TEXT_HTML) { 4597 /* Inside raw HTML, we output the new line verbatim, including 4598 * any trailing spaces. */ 4599 OFF tmp2 = off; 4600 4601 while(tmp2 < end && ctx.ISBLANK(tmp2)) 4602 tmp2++; 4603 if(tmp2 > off) 4604 { 4605 ret = MD_TEXT(ctx, MD_TEXT_HTML, ctx.STR(off), tmp2 - off); 4606 if (ret != 0) goto abort; 4607 } 4608 ret = MD_TEXT(ctx, MD_TEXT_HTML, "\n", 1); 4609 if (ret != 0) goto abort; 4610 } else { 4611 /* Output soft or hard line break. */ 4612 MD_TEXTTYPE break_type = MD_TEXT_SOFTBR; 4613 4614 if(text_type == MD_TEXT_NORMAL) { 4615 if(enforce_hardbreak) 4616 break_type = MD_TEXT_BR; 4617 else if((ctx.CH(line.end) == ' ' && ctx.CH(line.end+1) == ' ')) 4618 break_type = MD_TEXT_BR; 4619 } 4620 4621 ret = MD_TEXT(ctx, break_type, "\n", 1); 4622 if (ret != 0) goto abort; 4623 } 4624 4625 /* Move to the next line. */ 4626 line++; 4627 off = line.beg; 4628 4629 enforce_hardbreak = 0; 4630 } 4631 } 4632 4633 abort: 4634 return ret; 4635 } 4636 4637 4638 /*************************** 4639 *** Processing Tables *** 4640 ***************************/ 4641 4642 void md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align_, int n_align) 4643 { 4644 static immutable MD_ALIGN[] align_map = 4645 [ 4646 MD_ALIGN_DEFAULT, 4647 MD_ALIGN_LEFT, 4648 MD_ALIGN_RIGHT, 4649 MD_ALIGN_CENTER 4650 ]; 4651 OFF off = beg; 4652 4653 while(n_align > 0) { 4654 int index = 0; /* index into align_map[] */ 4655 4656 while(ctx.CH(off) != '-') 4657 off++; 4658 if(off > beg && ctx.CH(off-1) == ':') 4659 index |= 1; 4660 while(off < end && ctx.CH(off) == '-') 4661 off++; 4662 if(off < end && ctx.CH(off) == ':') 4663 index |= 2; 4664 4665 *align_ = align_map[index]; 4666 align_++; 4667 n_align--; 4668 } 4669 4670 } 4671 4672 int md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align_, OFF beg, OFF end) 4673 { 4674 MD_LINE line; 4675 MD_BLOCK_TD_DETAIL det; 4676 int ret = 0; 4677 4678 while(beg < end && ctx.ISWHITESPACE(beg)) 4679 beg++; 4680 while(end > beg && ctx.ISWHITESPACE(end-1)) 4681 end--; 4682 4683 det.align_ = align_; 4684 line.beg = beg; 4685 line.end = end; 4686 4687 ret = MD_ENTER_BLOCK(ctx, cell_type, &det); 4688 if (ret != 0) goto abort; 4689 ret = (md_process_normal_block_contents(ctx, &line, 1)); 4690 if (ret < 0) goto abort; 4691 ret = MD_LEAVE_BLOCK(ctx, cell_type, &det); 4692 if (ret != 0) goto abort; 4693 4694 abort: 4695 return ret; 4696 } 4697 4698 int md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end, 4699 const MD_ALIGN* align_, int col_count) 4700 { 4701 MD_LINE line; 4702 OFF* pipe_offs = null; 4703 int i, j, n; 4704 int ret = 0; 4705 4706 line.beg = beg; 4707 line.end = end; 4708 4709 /* Break the line into table cells by identifying pipe characters who 4710 * form the cell boundary. */ 4711 ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); 4712 if (ret < 0) goto abort; 4713 4714 /* We have to remember the cell boundaries in local buffer because 4715 * ctx.marks[] shall be reused during cell contents processing. */ 4716 n = ctx.n_table_cell_boundaries; 4717 pipe_offs = cast(OFF*) malloc(n * OFF.sizeof); 4718 if(pipe_offs == null) { 4719 ctx.MD_LOG("malloc() failed."); 4720 ret = -1; 4721 goto abort; 4722 } 4723 for(i = ctx.TABLECELLBOUNDARIES.head, j = 0; i >= 0; i = ctx.marks[i].next) { 4724 MD_MARK* mark = &ctx.marks[i]; 4725 pipe_offs[j++] = mark.beg; 4726 } 4727 4728 /* Process cells. */ 4729 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TR, null); 4730 if (ret != 0) goto abort; 4731 4732 j = 0; 4733 if(beg < pipe_offs[0] && j < col_count) 4734 { 4735 ret = (md_process_table_cell(ctx, cell_type, align_[j++], beg, pipe_offs[0])); 4736 if (ret < 0) goto abort; 4737 } 4738 for(i = 0; i < n-1 && j < col_count; i++) 4739 { 4740 ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[i]+1, pipe_offs[i+1])); 4741 if (ret < 0) goto abort; 4742 } 4743 if(pipe_offs[n-1] < end-1 && j < col_count) 4744 { 4745 ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[n-1]+1, end)); 4746 if (ret < 0) goto abort; 4747 } 4748 /* Make sure we call enough table cells even if the current table contains 4749 * too few of them. */ 4750 while(j < col_count) 4751 { 4752 ret = (md_process_table_cell(ctx, cell_type, align_[j++], 0, 0)); 4753 if (ret < 0) goto abort; 4754 } 4755 4756 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TR, null); 4757 if (ret != 0) goto abort; 4758 4759 abort: 4760 free(pipe_offs); 4761 4762 /* Free any temporary memory blocks stored within some dummy marks. */ 4763 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4764 free(md_mark_get_ptr(ctx, i)); 4765 ctx.PTR_CHAIN.head = -1; 4766 ctx.PTR_CHAIN.tail = -1; 4767 4768 return ret; 4769 } 4770 4771 int md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines) 4772 { 4773 MD_ALIGN* align_; 4774 int i; 4775 int ret = 0; 4776 4777 /* At least two lines have to be present: The column headers and the line 4778 * with the underlines. */ 4779 assert(n_lines >= 2); 4780 4781 align_ = cast(MD_ALIGN*) malloc(col_count * MD_ALIGN.sizeof); 4782 if(align_ == null) { 4783 ctx.MD_LOG("malloc() failed."); 4784 ret = -1; 4785 goto abort; 4786 } 4787 4788 md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align_, col_count); 4789 4790 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_THEAD, null); 4791 if (ret != 0) goto abort; 4792 ret = (md_process_table_row(ctx, MD_BLOCK_TH, 4793 lines[0].beg, lines[0].end, align_, col_count)); 4794 if (ret < 0) goto abort; 4795 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_THEAD, null); 4796 if (ret != 0) goto abort; 4797 4798 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TBODY, null); 4799 if (ret != 0) goto abort; 4800 for(i = 2; i < n_lines; i++) { 4801 ret = (md_process_table_row(ctx, MD_BLOCK_TD, 4802 lines[i].beg, lines[i].end, align_, col_count)); 4803 if (ret < 0) goto abort; 4804 } 4805 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TBODY, null); 4806 if (ret != 0) goto abort; 4807 4808 abort: 4809 free(align_); 4810 return ret; 4811 } 4812 4813 int md_is_table_row(MD_CTX* ctx, OFF beg, OFF* p_end) 4814 { 4815 MD_LINE line; 4816 int i; 4817 int ret = FALSE; 4818 4819 line.beg = beg; 4820 line.end = beg; 4821 4822 /* Find end of line. */ 4823 while(line.end < ctx.size && !ctx.ISNEWLINE(line.end)) 4824 line.end++; 4825 4826 ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); 4827 if (ret < 0) goto abort; 4828 4829 if(ctx.TABLECELLBOUNDARIES.head >= 0) { 4830 if(p_end != null) 4831 *p_end = line.end; 4832 ret = TRUE; 4833 } 4834 4835 abort: 4836 /* Free any temporary memory blocks stored within some dummy marks. */ 4837 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4838 free(md_mark_get_ptr(ctx, i)); 4839 ctx.PTR_CHAIN.head = -1; 4840 ctx.PTR_CHAIN.tail = -1; 4841 4842 return ret; 4843 } 4844 4845 4846 /************************** 4847 *** Processing Block *** 4848 **************************/ 4849 4850 enum MD_BLOCK_CONTAINER_OPENER = 0x01; 4851 enum MD_BLOCK_CONTAINER_CLOSER = 0x02; 4852 enum MD_BLOCK_CONTAINER = (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER); 4853 enum MD_BLOCK_LOOSE_LIST = 0x04; 4854 enum MD_BLOCK_SETEXT_HEADER = 0x08; 4855 4856 struct MD_BLOCK 4857 { 4858 nothrow: 4859 @nogc: 4860 ubyte type_; 4861 ubyte flags_; 4862 ushort data_; 4863 4864 MD_BLOCKTYPE type() const { return type_; } 4865 void type(MD_BLOCKTYPE value) { type_ = cast(ubyte)value; } 4866 4867 uint flags() const { return flags_; } 4868 void flags(uint value) { flags_ = cast(ubyte)value; } 4869 4870 /* MD_BLOCK_H: Header level (1 - 6) 4871 * MD_BLOCK_CODE: Non-zero if fenced, zero if indented. 4872 * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' '). 4873 * MD_BLOCK_TABLE: Column count (as determined by the table underline). 4874 */ 4875 uint data() const { return data_; } 4876 void data(uint value) { data_ = cast(ubyte)value; } 4877 4878 /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block. 4879 * MD_BLOCK_LI: Task mark offset in the input doc. 4880 * MD_BLOCK_OL: Start item number. 4881 */ 4882 uint n_lines; 4883 } 4884 4885 static assert(MD_BLOCK.sizeof == 8); 4886 4887 struct MD_CONTAINER 4888 { 4889 nothrow: 4890 @nogc: 4891 4892 CHAR ch; 4893 4894 ubyte is_loose_; 4895 ubyte is_task_; 4896 4897 uint is_loose() { return is_loose_; } 4898 void is_loose(uint value) { is_loose_ = cast(ubyte)value; } 4899 4900 uint is_task() { return is_task_; } 4901 void is_task(uint value) { is_task_ = cast(ubyte)value; } 4902 4903 uint start; 4904 uint mark_indent; 4905 uint contents_indent; 4906 OFF block_byte_off; 4907 OFF task_mark_off; 4908 } 4909 4910 4911 int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 4912 { 4913 int i; 4914 int ret; 4915 4916 ret = (md_analyze_inlines(ctx, lines, n_lines, FALSE)); 4917 if (ret < 0) goto abort; 4918 ret = (md_process_inlines(ctx, lines, n_lines)); 4919 if (ret < 0) goto abort; 4920 4921 abort: 4922 /* Free any temporary memory blocks stored within some dummy marks. */ 4923 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4924 free(md_mark_get_ptr(ctx, i)); 4925 ctx.PTR_CHAIN.head = -1; 4926 ctx.PTR_CHAIN.tail = -1; 4927 4928 return ret; 4929 } 4930 4931 int md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines) 4932 { 4933 static immutable string indent_chunk_str = " "; 4934 4935 int i; 4936 int ret = 0; 4937 4938 for(i = 0; i < n_lines; i++) { 4939 const MD_VERBATIMLINE* line = &lines[i]; 4940 int indent = line.indent; 4941 4942 assert(indent >= 0); 4943 4944 /* Output code indentation. */ 4945 while(indent > cast(int)(indent_chunk_str.length)) { 4946 ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, cast(SZ)(indent_chunk_str.length)); 4947 if (ret != 0) goto abort; 4948 indent -= indent_chunk_str.length; 4949 } 4950 if(indent > 0) 4951 { 4952 ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, indent); 4953 if (ret != 0) goto abort; 4954 } 4955 4956 /* Output the code line itself. */ 4957 ret = MD_TEXT_INSECURE(ctx, text_type, ctx.STR(line.beg), line.end - line.beg); 4958 if (ret != 0) goto abort; 4959 4960 /* Enforce end-of-line. */ 4961 ret = MD_TEXT(ctx, text_type, "\n", 1); 4962 if (ret != 0) goto abort; 4963 } 4964 4965 abort: 4966 return ret; 4967 } 4968 4969 static int 4970 md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const(MD_VERBATIMLINE)* lines, int n_lines) 4971 { 4972 if(is_fenced) { 4973 /* Skip the first line in case of fenced code: It is the fence. 4974 * (Only the starting fence is present due to logic in md_analyze_line().) */ 4975 lines++; 4976 n_lines--; 4977 } else { 4978 /* Ignore blank lines at start/end of indented code block. */ 4979 while(n_lines > 0 && lines[0].beg == lines[0].end) { 4980 lines++; 4981 n_lines--; 4982 } 4983 while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) { 4984 n_lines--; 4985 } 4986 } 4987 4988 if(n_lines == 0) 4989 return 0; 4990 4991 return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines); 4992 } 4993 4994 int md_setup_fenced_code_detail(MD_CTX* ctx, const(MD_BLOCK)* block, MD_BLOCK_CODE_DETAIL* det, 4995 MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build) 4996 { 4997 const(MD_VERBATIMLINE)* fence_line = cast(const(MD_VERBATIMLINE)*)(block + 1); 4998 OFF beg = fence_line.beg; 4999 OFF end = fence_line.end; 5000 OFF lang_end; 5001 CHAR fence_ch = ctx.CH(fence_line.beg); 5002 int ret = 0; 5003 5004 /* Skip the fence itself. */ 5005 while(beg < ctx.size && ctx.CH(beg) == fence_ch) 5006 beg++; 5007 /* Trim initial spaces. */ 5008 while(beg < ctx.size && ctx.CH(beg) == ' ') 5009 beg++; 5010 5011 /* Trim trailing spaces. */ 5012 while(end > beg && ctx.CH(end-1) == ' ') 5013 end--; 5014 5015 /* Build info string attribute. */ 5016 ret = (md_build_attribute(ctx, ctx.STR(beg), end - beg, 0, &det.info, info_build)); 5017 if (ret < 0) goto abort; 5018 5019 /* Build info string attribute. */ 5020 lang_end = beg; 5021 while(lang_end < end && !ctx.ISWHITESPACE(lang_end)) 5022 lang_end++; 5023 ret = (md_build_attribute(ctx, ctx.STR(beg), lang_end - beg, 0, &det.lang, lang_build)); 5024 if (ret < 0) goto abort; 5025 5026 det.fence_char = fence_ch; 5027 5028 abort: 5029 return ret; 5030 } 5031 5032 static int 5033 md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) 5034 { 5035 static union HeaderOrCode 5036 { 5037 MD_BLOCK_H_DETAIL header; 5038 MD_BLOCK_CODE_DETAIL code; 5039 } 5040 HeaderOrCode det; 5041 MD_ATTRIBUTE_BUILD info_build; 5042 MD_ATTRIBUTE_BUILD lang_build; 5043 int is_in_tight_list; 5044 int clean_fence_code_detail = FALSE; 5045 int ret = 0; 5046 5047 memset(&det, 0, det.sizeof); 5048 5049 if(ctx.n_containers == 0) 5050 is_in_tight_list = FALSE; 5051 else 5052 is_in_tight_list = !ctx.containers[ctx.n_containers-1].is_loose; 5053 5054 switch(block.type) 5055 { 5056 case MD_BLOCK_H: 5057 det.header.level = block.data; 5058 break; 5059 5060 case MD_BLOCK_CODE: 5061 /* For fenced code block, we may need to set the info string. */ 5062 if(block.data != 0) { 5063 memset(&det.code, 0, MD_BLOCK_CODE_DETAIL.sizeof); 5064 clean_fence_code_detail = TRUE; 5065 ret = (md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build)); 5066 if (ret < 0) goto abort; 5067 } 5068 break; 5069 5070 default: 5071 /* Noop. */ 5072 break; 5073 } 5074 5075 if(!is_in_tight_list || block.type != MD_BLOCK_P) 5076 { 5077 ret = MD_ENTER_BLOCK(ctx, block.type, cast(void*) &det); 5078 if (ret != 0) goto abort; 5079 } 5080 5081 /* Process the block contents accordingly to is type. */ 5082 switch(block.type) { 5083 case MD_BLOCK_HR: 5084 /* noop */ 5085 break; 5086 5087 case MD_BLOCK_CODE: 5088 ret = (md_process_code_block_contents(ctx, (block.data != 0), 5089 cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); 5090 if (ret < 0) goto abort; 5091 break; 5092 5093 case MD_BLOCK_HTML: 5094 ret = (md_process_verbatim_block_contents(ctx, MD_TEXT_HTML, 5095 cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); 5096 if (ret < 0) goto abort; 5097 break; 5098 5099 case MD_BLOCK_TABLE: 5100 ret = (md_process_table_block_contents(ctx, block.data, 5101 cast(const(MD_LINE)*)(block + 1), block.n_lines)); 5102 if (ret < 0) goto abort; 5103 break; 5104 5105 default: 5106 ret = (md_process_normal_block_contents(ctx, 5107 cast(const(MD_LINE)*)(block + 1), block.n_lines)); 5108 if (ret < 0) goto abort; 5109 break; 5110 } 5111 5112 if(!is_in_tight_list || block.type != MD_BLOCK_P) 5113 { 5114 ret = MD_LEAVE_BLOCK(ctx, block.type, cast(void*) &det); 5115 if (ret != 0) goto abort; 5116 } 5117 5118 abort: 5119 if(clean_fence_code_detail) { 5120 md_free_attribute(ctx, &info_build); 5121 md_free_attribute(ctx, &lang_build); 5122 } 5123 return ret; 5124 } 5125 5126 int md_process_all_blocks(MD_CTX* ctx) 5127 { 5128 int byte_off = 0; 5129 int ret = 0; 5130 5131 /* ctx.containers now is not needed for detection of lists and list items 5132 * so we reuse it for tracking what lists are loose or tight. We rely 5133 * on the fact the vector is large enough to hold the deepest nesting 5134 * level of lists. */ 5135 ctx.n_containers = 0; 5136 5137 while(byte_off < ctx.n_block_bytes) { 5138 MD_BLOCK* block = cast(MD_BLOCK*)(cast(char*)ctx.block_bytes + byte_off); 5139 static union Det 5140 { 5141 MD_BLOCK_UL_DETAIL ul; 5142 MD_BLOCK_OL_DETAIL ol; 5143 MD_BLOCK_LI_DETAIL li; 5144 } 5145 5146 Det det; 5147 5148 switch(block.type) { 5149 case MD_BLOCK_UL: 5150 det.ul.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; 5151 det.ul.mark = cast(CHAR) block.data; 5152 break; 5153 5154 case MD_BLOCK_OL: 5155 det.ol.start = block.n_lines; 5156 det.ol.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; 5157 det.ol.mark_delimiter = cast(CHAR) block.data; 5158 break; 5159 5160 case MD_BLOCK_LI: 5161 det.li.is_task = (block.data != 0); 5162 det.li.task_mark = cast(CHAR) block.data; 5163 det.li.task_mark_offset = cast(OFF) block.n_lines; 5164 break; 5165 5166 default: 5167 /* noop */ 5168 break; 5169 } 5170 5171 if(block.flags & MD_BLOCK_CONTAINER) { 5172 if(block.flags & MD_BLOCK_CONTAINER_CLOSER) { 5173 ret = MD_LEAVE_BLOCK(ctx, block.type, &det); 5174 if (ret != 0) goto abort; 5175 5176 if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL || block.type == MD_BLOCK_QUOTE) 5177 ctx.n_containers--; 5178 } 5179 5180 if(block.flags & MD_BLOCK_CONTAINER_OPENER) { 5181 ret = MD_ENTER_BLOCK(ctx, block.type, &det); 5182 if (ret != 0) goto abort; 5183 5184 if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL) { 5185 ctx.containers[ctx.n_containers].is_loose = (block.flags & MD_BLOCK_LOOSE_LIST); 5186 ctx.n_containers++; 5187 } else if(block.type == MD_BLOCK_QUOTE) { 5188 /* This causes that any text in a block quote, even if 5189 * nested inside a tight list item, is wrapped with 5190 * <p>...</p>. */ 5191 ctx.containers[ctx.n_containers].is_loose = TRUE; 5192 ctx.n_containers++; 5193 } 5194 } 5195 } else { 5196 ret = (md_process_leaf_block(ctx, block)); 5197 if (ret < 0) goto abort; 5198 5199 if(block.type == MD_BLOCK_CODE || block.type == MD_BLOCK_HTML) 5200 byte_off += block.n_lines * MD_VERBATIMLINE.sizeof; 5201 else 5202 byte_off += block.n_lines * MD_LINE.sizeof; 5203 } 5204 5205 byte_off += MD_BLOCK.sizeof; 5206 } 5207 5208 ctx.n_block_bytes = 0; 5209 5210 abort: 5211 return ret; 5212 } 5213 5214 5215 /************************************ 5216 *** Grouping Lines into Blocks *** 5217 ************************************/ 5218 5219 static void* 5220 md_push_block_bytes(MD_CTX* ctx, int n_bytes) 5221 { 5222 void* ptr; 5223 5224 if(ctx.n_block_bytes + n_bytes > ctx.alloc_block_bytes) { 5225 void* new_block_bytes; 5226 5227 ctx.alloc_block_bytes = (ctx.alloc_block_bytes > 0 ? ctx.alloc_block_bytes * 2 : 512); 5228 new_block_bytes = realloc(ctx.block_bytes, ctx.alloc_block_bytes); 5229 if(new_block_bytes == null) { 5230 ctx.MD_LOG("realloc() failed."); 5231 return null; 5232 } 5233 5234 /* Fix the .current_block after the reallocation. */ 5235 if(ctx.current_block != null) { 5236 OFF off_current_block = cast(uint)( cast(char*) ctx.current_block - cast(char*) ctx.block_bytes ); 5237 ctx.current_block = cast(MD_BLOCK*) (cast(char*) new_block_bytes + off_current_block); 5238 } 5239 5240 ctx.block_bytes = new_block_bytes; 5241 } 5242 5243 ptr = cast(char*)ctx.block_bytes + ctx.n_block_bytes; 5244 ctx.n_block_bytes += n_bytes; 5245 return ptr; 5246 } 5247 5248 static int 5249 md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) 5250 { 5251 MD_BLOCK* block; 5252 5253 assert(ctx.current_block == null); 5254 5255 block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); 5256 if(block == null) 5257 return -1; 5258 5259 switch(line.type) { 5260 case MD_LINE_HR: 5261 block.type = MD_BLOCK_HR; 5262 break; 5263 5264 case MD_LINE_ATXHEADER: 5265 case MD_LINE_SETEXTHEADER: 5266 block.type = MD_BLOCK_H; 5267 break; 5268 5269 case MD_LINE_FENCEDCODE: 5270 case MD_LINE_INDENTEDCODE: 5271 block.type = MD_BLOCK_CODE; 5272 break; 5273 5274 case MD_LINE_TEXT: 5275 block.type = MD_BLOCK_P; 5276 break; 5277 5278 case MD_LINE_HTML: 5279 block.type = MD_BLOCK_HTML; 5280 break; 5281 5282 case MD_LINE_BLANK: 5283 case MD_LINE_SETEXTUNDERLINE: 5284 case MD_LINE_TABLEUNDERLINE: 5285 default: 5286 assert(false); 5287 } 5288 5289 block.flags = 0; 5290 block.data = line.data; 5291 block.n_lines = 0; 5292 5293 ctx.current_block = block; 5294 return 0; 5295 } 5296 5297 /* Eat from start of current (textual) block any reference definitions and 5298 * remember them so we can resolve any links referring to them. 5299 * 5300 * (Reference definitions can only be at start of it as they cannot break 5301 * a paragraph.) 5302 */ 5303 int md_consume_link_reference_definitions(MD_CTX* ctx) 5304 { 5305 MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); 5306 int n_lines = ctx.current_block.n_lines; 5307 int n = 0; 5308 5309 /* Compute how many lines at the start of the block form one or more 5310 * reference definitions. */ 5311 while(n < n_lines) { 5312 int n_link_ref_lines; 5313 5314 n_link_ref_lines = md_is_link_reference_definition(ctx, 5315 lines + n, n_lines - n); 5316 /* Not a reference definition? */ 5317 if(n_link_ref_lines == 0) 5318 break; 5319 5320 /* We fail if it is the ref. def. but it could not be stored due 5321 * a memory allocation error. */ 5322 if(n_link_ref_lines < 0) 5323 return -1; 5324 5325 n += n_link_ref_lines; 5326 } 5327 5328 /* If there was at least one reference definition, we need to remove 5329 * its lines from the block, or perhaps even the whole block. */ 5330 if(n > 0) { 5331 if(n == n_lines) { 5332 /* Remove complete block. */ 5333 ctx.n_block_bytes -= n * MD_LINE.sizeof; 5334 ctx.n_block_bytes -= MD_BLOCK.sizeof; 5335 ctx.current_block = null; 5336 } else { 5337 /* Remove just some initial lines from the block. */ 5338 memmove(lines, lines + n, (n_lines - n) * MD_LINE.sizeof); 5339 ctx.current_block.n_lines -= n; 5340 ctx.n_block_bytes -= n * MD_LINE.sizeof; 5341 } 5342 } 5343 5344 return 0; 5345 } 5346 5347 static int 5348 md_end_current_block(MD_CTX* ctx) 5349 { 5350 int ret = 0; 5351 5352 if(ctx.current_block == null) 5353 return ret; 5354 5355 /* Check whether there is a reference definition. (We do this here instead 5356 * of in md_analyze_line() because reference definition can take multiple 5357 * lines.) */ 5358 if(ctx.current_block.type == MD_BLOCK_P || 5359 (ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER))) 5360 { 5361 MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); 5362 if(ctx.CH(lines[0].beg) == '[') { 5363 ret = (md_consume_link_reference_definitions(ctx)); 5364 if (ret < 0) goto abort; 5365 if(ctx.current_block == null) 5366 return ret; 5367 } 5368 } 5369 5370 if(ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER)) { 5371 int n_lines = ctx.current_block.n_lines; 5372 5373 if(n_lines > 1) { 5374 /* Get rid of the underline. */ 5375 ctx.current_block.n_lines--; 5376 ctx.n_block_bytes -= MD_LINE.sizeof; 5377 } else { 5378 /* Only the underline has left after eating the ref. defs. 5379 * Keep the line as beginning of a new ordinary paragraph. */ 5380 ctx.current_block.type = MD_BLOCK_P; 5381 return 0; 5382 } 5383 } 5384 5385 /* Mark we are not building any block anymore. */ 5386 ctx.current_block = null; 5387 5388 abort: 5389 return ret; 5390 } 5391 5392 static int 5393 md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis) 5394 { 5395 assert(ctx.current_block != null); 5396 5397 if(ctx.current_block.type == MD_BLOCK_CODE || ctx.current_block.type == MD_BLOCK_HTML) { 5398 MD_VERBATIMLINE* line; 5399 5400 line = cast(MD_VERBATIMLINE*) md_push_block_bytes(ctx, MD_VERBATIMLINE.sizeof); 5401 if(line == null) 5402 return -1; 5403 5404 line.indent = analysis.indent; 5405 line.beg = analysis.beg; 5406 line.end = analysis.end; 5407 } else { 5408 MD_LINE* line; 5409 5410 line = cast(MD_LINE*) md_push_block_bytes(ctx, MD_LINE.sizeof); 5411 if(line == null) 5412 return -1; 5413 5414 line.beg = analysis.beg; 5415 line.end = analysis.end; 5416 } 5417 ctx.current_block.n_lines++; 5418 5419 return 0; 5420 } 5421 5422 static int 5423 md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, uint start, 5424 uint data, uint flags) 5425 { 5426 MD_BLOCK* block; 5427 int ret = 0; 5428 5429 ret = (md_end_current_block(ctx)); 5430 if (ret < 0) goto abort; 5431 5432 block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); 5433 if(block == null) 5434 return -1; 5435 5436 block.type = type; 5437 block.flags = flags; 5438 block.data = data; 5439 block.n_lines = start; 5440 5441 abort: 5442 return ret; 5443 } 5444 5445 5446 5447 /*********************** 5448 *** Line Analysis *** 5449 ***********************/ 5450 5451 static int 5452 md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer) 5453 { 5454 OFF off = beg + 1; 5455 int n = 1; 5456 5457 while(off < ctx.size && (ctx.CH(off) == ctx.CH(beg) || ctx.CH(off) == ' ' || ctx.CH(off) == '\t')) { 5458 if(ctx.CH(off) == ctx.CH(beg)) 5459 n++; 5460 off++; 5461 } 5462 5463 if(n < 3) { 5464 *p_killer = off; 5465 return FALSE; 5466 } 5467 5468 /* Nothing else can be present on the line. */ 5469 if(off < ctx.size && !ctx.ISNEWLINE(off)) { 5470 *p_killer = off; 5471 return FALSE; 5472 } 5473 5474 *p_end = off; 5475 return TRUE; 5476 } 5477 5478 static int 5479 md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, uint* p_level) 5480 { 5481 int n; 5482 OFF off = beg + 1; 5483 5484 while(off < ctx.size && ctx.CH(off) == '#' && off - beg < 7) 5485 off++; 5486 n = off - beg; 5487 5488 if(n > 6) 5489 return FALSE; 5490 *p_level = n; 5491 5492 if(!(ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx.size && 5493 ctx.CH(off) != ' ' && ctx.CH(off) != '\t' && !ctx.ISNEWLINE(off)) 5494 return FALSE; 5495 5496 while(off < ctx.size && ctx.CH(off) == ' ') 5497 off++; 5498 *p_beg = off; 5499 *p_end = off; 5500 return TRUE; 5501 } 5502 5503 static int 5504 md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_level) 5505 { 5506 OFF off = beg + 1; 5507 5508 while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) 5509 off++; 5510 5511 /* Optionally, space(s) can follow. */ 5512 while(off < ctx.size && ctx.CH(off) == ' ') 5513 off++; 5514 5515 /* But nothing more is allowed on the line. */ 5516 if(off < ctx.size && !ctx.ISNEWLINE(off)) 5517 return FALSE; 5518 5519 *p_level = (ctx.CH(beg) == '=' ? 1 : 2); 5520 *p_end = off; 5521 return TRUE; 5522 } 5523 5524 int md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_col_count) 5525 { 5526 OFF off = beg; 5527 int found_pipe = FALSE; 5528 uint col_count = 0; 5529 5530 if(off < ctx.size && ctx.CH(off) == '|') { 5531 found_pipe = TRUE; 5532 off++; 5533 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5534 off++; 5535 } 5536 5537 while(1) { 5538 OFF cell_beg; 5539 int delimited = FALSE; 5540 5541 /* Cell underline ("-----", ":----", "----:" or ":----:") */ 5542 cell_beg = off; 5543 if(off < ctx.size && ctx.CH(off) == ':') 5544 off++; 5545 while(off < ctx.size && ctx.CH(off) == '-') 5546 off++; 5547 if(off < ctx.size && ctx.CH(off) == ':') 5548 off++; 5549 if(off - cell_beg < 3) 5550 return FALSE; 5551 5552 col_count++; 5553 5554 /* Pipe delimiter (optional at the end of line). */ 5555 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5556 off++; 5557 if(off < ctx.size && ctx.CH(off) == '|') { 5558 delimited = TRUE; 5559 found_pipe = TRUE; 5560 off++; 5561 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5562 off++; 5563 } 5564 5565 /* Success, if we reach end of line. */ 5566 if(off >= ctx.size || ctx.ISNEWLINE(off)) 5567 break; 5568 5569 if(!delimited) 5570 return FALSE; 5571 } 5572 5573 if(!found_pipe) 5574 return FALSE; 5575 5576 *p_end = off; 5577 *p_col_count = col_count; 5578 return TRUE; 5579 } 5580 5581 static int 5582 md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end) 5583 { 5584 OFF off = beg; 5585 5586 while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) 5587 off++; 5588 5589 /* Fence must have at least three characters. */ 5590 if(off - beg < 3) 5591 return FALSE; 5592 5593 ctx.code_fence_length = off - beg; 5594 5595 /* Optionally, space(s) can follow. */ 5596 while(off < ctx.size && ctx.CH(off) == ' ') 5597 off++; 5598 5599 /* Optionally, an info string can follow. */ 5600 while(off < ctx.size && !ctx.ISNEWLINE(off)) { 5601 /* Backtick-based fence must not contain '`' in the info string. */ 5602 if(ctx.CH(beg) == '`' && ctx.CH(off) == '`') 5603 return FALSE; 5604 off++; 5605 } 5606 5607 *p_end = off; 5608 return TRUE; 5609 } 5610 5611 static int 5612 md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end) 5613 { 5614 OFF off = beg; 5615 int ret = FALSE; 5616 5617 /* Closing fence must have at least the same length and use same char as 5618 * opening one. */ 5619 while(off < ctx.size && ctx.CH(off) == ch) 5620 off++; 5621 if(off - beg < ctx.code_fence_length) 5622 goto out_; 5623 5624 /* Optionally, space(s) can follow */ 5625 while(off < ctx.size && ctx.CH(off) == ' ') 5626 off++; 5627 5628 /* But nothing more is allowed on the line. */ 5629 if(off < ctx.size && !ctx.ISNEWLINE(off)) 5630 goto out_; 5631 5632 ret = TRUE; 5633 5634 out_: 5635 /* Note we set *p_end even on failure: If we are not closing fence, caller 5636 * would eat the line anyway without any parsing. */ 5637 *p_end = off; 5638 return ret; 5639 } 5640 5641 /* Returns type of the raw HTML block, or FALSE if it is not HTML block. 5642 * (Refer to CommonMark specification for details about the types.) 5643 */ 5644 int md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) 5645 { 5646 /* Type 6 is started by a long list of allowed tags. We use two-level 5647 * tree to speed-up the search. */ 5648 5649 static immutable string Xend = null; 5650 static immutable string[] t1 = [ "script", "pre", "style", Xend ]; 5651 static immutable string[] a6 = [ "address", "article", "aside", Xend ]; 5652 static immutable string[] b6 = [ "base", "basefont", "blockquote", "body", Xend ]; 5653 static immutable string[] c6 = [ "caption", "center", "col", "colgroup", Xend ]; 5654 static immutable string[] d6 = [ "dd", "details", "dialog", "dir", 5655 "div", "dl", "dt", Xend ]; 5656 static immutable string[] f6 = [ "fieldset", "figcaption", "figure", "footer", 5657 "form", "frame", "frameset", Xend ]; 5658 static immutable string[] h6 = [ "h1", "head", "header", "hr", "html", Xend ]; 5659 static immutable string[] i6 = [ "iframe", Xend ]; 5660 static immutable string[] l6 = [ "legend", "li", "link", Xend ]; 5661 static immutable string[] m6 = [ "main", "menu", "menuitem", Xend ]; 5662 static immutable string[] n6 = [ "nav", "noframes", Xend ]; 5663 static immutable string[] o6 = [ "ol", "optgroup", "option", Xend ]; 5664 static immutable string[] p6 = [ "p", "param", Xend ]; 5665 static immutable string[] s6 = [ "section", "source", "summary", Xend ]; 5666 static immutable string[] t6 = [ "table", "tbody", "td", "tfoot", "th", 5667 "thead", "title", "tr", "track", Xend ]; 5668 static immutable string[] u6 = [ "ul", Xend ]; 5669 static immutable string[] xx = [ Xend ]; 5670 5671 immutable(string)*[26] map6; 5672 map6[0] = a6.ptr; 5673 map6[1] = b6.ptr; 5674 map6[2] = c6.ptr; 5675 map6[3] = d6.ptr; 5676 map6[4] = xx.ptr; 5677 map6[5] = f6.ptr; 5678 map6[6] = xx.ptr; 5679 map6[7] = h6.ptr; 5680 map6[8] = i6.ptr; 5681 map6[9] = xx.ptr; 5682 map6[10] = xx.ptr; 5683 map6[11] = l6.ptr; 5684 map6[12] = m6.ptr; 5685 map6[13] = n6.ptr; 5686 map6[14] = o6.ptr; 5687 map6[15] = p6.ptr; 5688 map6[16] = xx.ptr; 5689 map6[17] = xx.ptr; 5690 map6[18] = s6.ptr; 5691 map6[19] = t6.ptr; 5692 map6[20] = u6.ptr; 5693 map6[21] = xx.ptr; 5694 map6[22] = xx.ptr; 5695 map6[23] = xx.ptr; 5696 map6[24] = xx.ptr; 5697 map6[25] = xx.ptr; 5698 5699 OFF off = beg + 1; 5700 int i; 5701 5702 /* Check for type 1: <script, <pre, or <style */ 5703 for(i = 0; t1[i].ptr != null; i++) 5704 { 5705 if(off + t1[i].length <= ctx.size) 5706 { 5707 if(md_ascii_case_eq(ctx.STR(off), t1[i].ptr, cast(uint)(t1[i].length))) 5708 return 1; 5709 } 5710 } 5711 5712 /* Check for type 2: <!-- */ 5713 if(off + 3 < ctx.size && ctx.CH(off) == '!' && ctx.CH(off+1) == '-' && ctx.CH(off+2) == '-') 5714 return 2; 5715 5716 /* Check for type 3: <? */ 5717 if(off < ctx.size && ctx.CH(off) == '?') 5718 return 3; 5719 5720 /* Check for type 4 or 5: <! */ 5721 if(off < ctx.size && ctx.CH(off) == '!') { 5722 /* Check for type 4: <! followed by uppercase letter. */ 5723 if(off + 1 < ctx.size && ctx.ISUPPER(off+1)) 5724 return 4; 5725 5726 /* Check for type 5: <![CDATA[ */ 5727 if(off + 8 < ctx.size) { 5728 if(md_ascii_eq(ctx.STR(off), "![CDATA[", 8 * CHAR.sizeof)) 5729 return 5; 5730 } 5731 } 5732 5733 /* Check for type 6: Many possible starting tags listed above. */ 5734 if(off + 1 < ctx.size && (ctx.ISALPHA(off) || (ctx.CH(off) == '/' && ctx.ISALPHA(off+1)))) { 5735 int slot; 5736 const(string)* tags; 5737 5738 if(ctx.CH(off) == '/') 5739 off++; 5740 5741 slot = (ctx.ISUPPER(off) ? ctx.CH(off) - 'A' : ctx.CH(off) - 'a'); 5742 tags = map6[slot]; 5743 5744 for(i = 0; tags[i].ptr != null; i++) { 5745 if(off + tags[i].length <= ctx.size) { 5746 if(md_ascii_case_eq(ctx.STR(off), tags[i].ptr, cast(uint) tags[i].length)) { 5747 OFF tmp = cast(uint)(off + tags[i].length); 5748 if(tmp >= ctx.size) 5749 return 6; 5750 if(ctx.ISBLANK(tmp) || ctx.ISNEWLINE(tmp) || ctx.CH(tmp) == '>') 5751 return 6; 5752 if(tmp+1 < ctx.size && ctx.CH(tmp) == '/' && ctx.CH(tmp+1) == '>') 5753 return 6; 5754 break; 5755 } 5756 } 5757 } 5758 } 5759 5760 /* Check for type 7: any COMPLETE other opening or closing tag. */ 5761 if(off + 1 < ctx.size) { 5762 OFF end; 5763 5764 if(md_is_html_tag(ctx, null, 0, beg, ctx.size, &end)) { 5765 /* Only optional whitespace and new line may follow. */ 5766 while(end < ctx.size && ctx.ISWHITESPACE(end)) 5767 end++; 5768 if(end >= ctx.size || ctx.ISNEWLINE(end)) 5769 return 7; 5770 } 5771 } 5772 5773 return FALSE; 5774 } 5775 5776 /* Case sensitive check whether there is a substring 'what' between 'beg' 5777 * and end of line. */ 5778 static int 5779 md_line_contains(MD_CTX* ctx, OFF beg, const(CHAR)* what, SZ what_len, OFF* p_end) 5780 { 5781 OFF i; 5782 for(i = beg; i + what_len < ctx.size; i++) { 5783 if(ctx.ISNEWLINE(i)) 5784 break; 5785 if(memcmp(ctx.STR(i), what, what_len * CHAR.sizeof) == 0) { 5786 *p_end = i + what_len; 5787 return TRUE; 5788 } 5789 } 5790 5791 *p_end = i; 5792 return FALSE; 5793 } 5794 5795 /* Returns type of HTML block end condition or FALSE if not an end condition. 5796 * 5797 * Note it fills p_end even when it is not end condition as the caller 5798 * does not need to analyze contents of a raw HTML block. 5799 */ 5800 int md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end) 5801 { 5802 switch(ctx.html_block_type) { 5803 case 1: 5804 { 5805 OFF off = beg; 5806 5807 while(off < ctx.size && !ctx.ISNEWLINE(off)) { 5808 if(ctx.CH(off) == '<') { 5809 if(md_ascii_case_eq(ctx.STR(off), "</script>", 9)) { 5810 *p_end = off + 9; 5811 return TRUE; 5812 } 5813 5814 if(md_ascii_case_eq(ctx.STR(off), "</style>", 8)) { 5815 *p_end = off + 8; 5816 return TRUE; 5817 } 5818 5819 if(md_ascii_case_eq(ctx.STR(off), "</pre>", 6)) { 5820 *p_end = off + 6; 5821 return TRUE; 5822 } 5823 } 5824 5825 off++; 5826 } 5827 *p_end = off; 5828 return FALSE; 5829 } 5830 5831 case 2: 5832 return (md_line_contains(ctx, beg, "-->", 3, p_end) ? 2 : FALSE); 5833 5834 case 3: 5835 return (md_line_contains(ctx, beg, "?>", 2, p_end) ? 3 : FALSE); 5836 5837 case 4: 5838 return (md_line_contains(ctx, beg, ">", 1, p_end) ? 4 : FALSE); 5839 5840 case 5: 5841 return (md_line_contains(ctx, beg, "]]>", 3, p_end) ? 5 : FALSE); 5842 5843 case 6: /* Pass through */ 5844 case 7: 5845 *p_end = beg; 5846 return (ctx.ISNEWLINE(beg) ? ctx.html_block_type : FALSE); 5847 5848 default: 5849 assert(false); 5850 } 5851 } 5852 5853 5854 static int 5855 md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container) 5856 { 5857 /* Block quote has no "items" like lists. */ 5858 if(container.ch == '>') 5859 return FALSE; 5860 5861 if(container.ch != pivot.ch) 5862 return FALSE; 5863 if(container.mark_indent > pivot.contents_indent) 5864 return FALSE; 5865 5866 return TRUE; 5867 } 5868 5869 static int 5870 md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) 5871 { 5872 if(ctx.n_containers >= ctx.alloc_containers) { 5873 MD_CONTAINER* new_containers; 5874 5875 ctx.alloc_containers = (ctx.alloc_containers > 0 ? ctx.alloc_containers * 2 : 16); 5876 new_containers = cast(MD_CONTAINER*) realloc(ctx.containers, ctx.alloc_containers * MD_CONTAINER.sizeof); 5877 if (new_containers == null) { 5878 ctx.MD_LOG("realloc() failed."); 5879 return -1; 5880 } 5881 5882 ctx.containers = new_containers; 5883 } 5884 5885 memcpy(&ctx.containers[ctx.n_containers++], container, MD_CONTAINER.sizeof); 5886 return 0; 5887 } 5888 5889 static int 5890 md_enter_child_containers(MD_CTX* ctx, int n_children, uint data) 5891 { 5892 int i; 5893 int ret = 0; 5894 5895 for(i = ctx.n_containers - n_children; i < ctx.n_containers; i++) { 5896 MD_CONTAINER* c = &ctx.containers[i]; 5897 int is_ordered_list = FALSE; 5898 5899 switch(c.ch) { 5900 case ')': 5901 case '.': 5902 is_ordered_list = TRUE; 5903 /* Pass through */ 5904 goto case '-'; 5905 5906 case '-': 5907 case '+': 5908 case '*': 5909 /* Remember offset in ctx.block_bytes so we can revisit the 5910 * block if we detect it is a loose list. */ 5911 md_end_current_block(ctx); 5912 c.block_byte_off = ctx.n_block_bytes; 5913 5914 ret = (md_push_container_bytes(ctx, 5915 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 5916 c.start, data, MD_BLOCK_CONTAINER_OPENER)); 5917 if (ret < 0) goto abort; 5918 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 5919 c.task_mark_off, 5920 (c.is_task ? ctx.CH(c.task_mark_off) : 0), 5921 MD_BLOCK_CONTAINER_OPENER)); 5922 if (ret < 0) goto abort; 5923 break; 5924 5925 case '>': 5926 ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); 5927 if (ret < 0) goto abort; 5928 break; 5929 5930 default: 5931 assert(false); 5932 } 5933 } 5934 5935 abort: 5936 return ret; 5937 } 5938 5939 static int 5940 md_leave_child_containers(MD_CTX* ctx, int n_keep) 5941 { 5942 int ret = 0; 5943 5944 while(ctx.n_containers > n_keep) { 5945 MD_CONTAINER* c = &ctx.containers[ctx.n_containers-1]; 5946 int is_ordered_list = FALSE; 5947 5948 switch(c.ch) { 5949 case ')': 5950 case '.': 5951 is_ordered_list = TRUE; 5952 /* Pass through */ 5953 goto case '-'; 5954 5955 case '-': 5956 case '+': 5957 case '*': 5958 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 5959 c.task_mark_off, (c.is_task ? ctx.CH(c.task_mark_off) : 0), 5960 MD_BLOCK_CONTAINER_CLOSER)); 5961 if (ret < 0) goto abort; 5962 ret = (md_push_container_bytes(ctx, 5963 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0, 5964 c.ch, MD_BLOCK_CONTAINER_CLOSER)); 5965 if (ret < 0) goto abort; 5966 break; 5967 5968 case '>': 5969 ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 5970 0, MD_BLOCK_CONTAINER_CLOSER)); 5971 if (ret < 0) goto abort; 5972 break; 5973 5974 default: 5975 assert(false); 5976 } 5977 5978 ctx.n_containers--; 5979 } 5980 5981 abort: 5982 return ret; 5983 } 5984 5985 static int 5986 md_is_container_mark(MD_CTX* ctx, uint indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container) 5987 { 5988 OFF off = beg; 5989 OFF max_end; 5990 5991 if(indent >= ctx.code_indent_offset) 5992 return FALSE; 5993 5994 /* Check for block quote mark. */ 5995 if(off < ctx.size && ctx.CH(off) == '>') { 5996 off++; 5997 p_container.ch = '>'; 5998 p_container.is_loose = FALSE; 5999 p_container.is_task = FALSE; 6000 p_container.mark_indent = indent; 6001 p_container.contents_indent = indent + 1; 6002 *p_end = off; 6003 return TRUE; 6004 } 6005 6006 /* Check for list item bullet mark. */ 6007 if(off+1 < ctx.size && ctx.ISANYOF(off, "-+*") && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { 6008 p_container.ch = ctx.CH(off); 6009 p_container.is_loose = FALSE; 6010 p_container.is_task = FALSE; 6011 p_container.mark_indent = indent; 6012 p_container.contents_indent = indent + 1; 6013 *p_end = off + 1; 6014 return TRUE; 6015 } 6016 6017 /* Check for ordered list item marks. */ 6018 max_end = off + 9; 6019 if(max_end > ctx.size) 6020 max_end = ctx.size; 6021 p_container.start = 0; 6022 while(off < max_end && ctx.ISDIGIT(off)) { 6023 p_container.start = p_container.start * 10 + ctx.CH(off) - '0'; 6024 off++; 6025 } 6026 if(off+1 < ctx.size && (ctx.CH(off) == '.' || ctx.CH(off) == ')') && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { 6027 p_container.ch = ctx.CH(off); 6028 p_container.is_loose = FALSE; 6029 p_container.is_task = FALSE; 6030 p_container.mark_indent = indent; 6031 p_container.contents_indent = indent + off - beg + 1; 6032 *p_end = off + 1; 6033 return TRUE; 6034 } 6035 6036 return FALSE; 6037 } 6038 6039 uint md_line_indentation(MD_CTX* ctx, uint total_indent, OFF beg, OFF* p_end) 6040 { 6041 OFF off = beg; 6042 uint indent = total_indent; 6043 6044 while(off < ctx.size && ctx.ISBLANK(off)) { 6045 if(ctx.CH(off) == '\t') 6046 indent = (indent + 4) & ~3; 6047 else 6048 indent++; 6049 off++; 6050 } 6051 6052 *p_end = off; 6053 return indent - total_indent; 6054 } 6055 6056 static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; 6057 6058 /* Analyze type of the line and find some its properties. This serves as a 6059 * main input for determining type and boundaries of a block. */ 6060 int md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, 6061 const(MD_LINE_ANALYSIS)* pivot_line, MD_LINE_ANALYSIS* line) 6062 { 6063 uint total_indent = 0; 6064 int n_parents = 0; 6065 int n_brothers = 0; 6066 int n_children = 0; 6067 MD_CONTAINER container = { 0 }; 6068 int prev_line_has_list_loosening_effect = ctx.last_line_has_list_loosening_effect; 6069 OFF off = beg; 6070 OFF hr_killer = 0; 6071 int ret = 0; 6072 6073 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6074 total_indent += line.indent; 6075 line.beg = off; 6076 6077 /* Given the indentation and block quote marks '>', determine how many of 6078 * the current containers are our parents. */ 6079 while(n_parents < ctx.n_containers) { 6080 MD_CONTAINER* c = &ctx.containers[n_parents]; 6081 6082 if(c.ch == '>' && line.indent < ctx.code_indent_offset && 6083 off < ctx.size && ctx.CH(off) == '>') 6084 { 6085 /* Block quote mark. */ 6086 off++; 6087 total_indent++; 6088 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6089 total_indent += line.indent; 6090 6091 /* The optional 1st space after '>' is part of the block quote mark. */ 6092 if(line.indent > 0) 6093 line.indent--; 6094 6095 line.beg = off; 6096 } else if(c.ch != '>' && line.indent >= c.contents_indent) { 6097 /* List. */ 6098 line.indent -= c.contents_indent; 6099 } else { 6100 break; 6101 } 6102 6103 n_parents++; 6104 } 6105 6106 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6107 /* Blank line does not need any real indentation to be nested inside 6108 * a list. */ 6109 if(n_brothers + n_children == 0) { 6110 while(n_parents < ctx.n_containers && ctx.containers[n_parents].ch != '>') 6111 n_parents++; 6112 } 6113 } 6114 6115 while(TRUE) { 6116 /* Check whether we are fenced code continuation. */ 6117 if(pivot_line.type == MD_LINE_FENCEDCODE) { 6118 line.beg = off; 6119 6120 /* We are another MD_LINE_FENCEDCODE unless we are closing fence 6121 * which we transform into MD_LINE_BLANK. */ 6122 if(line.indent < ctx.code_indent_offset) { 6123 if(md_is_closing_code_fence(ctx, ctx.CH(pivot_line.beg), off, &off)) { 6124 line.type = MD_LINE_BLANK; 6125 ctx.last_line_has_list_loosening_effect = FALSE; 6126 break; 6127 } 6128 } 6129 6130 /* Change indentation accordingly to the initial code fence. */ 6131 if(n_parents == ctx.n_containers) { 6132 if(line.indent > pivot_line.indent) 6133 line.indent -= pivot_line.indent; 6134 else 6135 line.indent = 0; 6136 6137 line.type = MD_LINE_FENCEDCODE; 6138 break; 6139 } 6140 } 6141 6142 /* Check whether we are HTML block continuation. */ 6143 if(pivot_line.type == MD_LINE_HTML && ctx.html_block_type > 0) { 6144 int html_block_type; 6145 6146 html_block_type = md_is_html_block_end_condition(ctx, off, &off); 6147 if(html_block_type > 0) { 6148 assert(html_block_type == ctx.html_block_type); 6149 6150 /* Make sure this is the last line of the block. */ 6151 ctx.html_block_type = 0; 6152 6153 /* Some end conditions serve as blank lines at the same time. */ 6154 if(html_block_type == 6 || html_block_type == 7) { 6155 line.type = MD_LINE_BLANK; 6156 line.indent = 0; 6157 break; 6158 } 6159 } 6160 6161 if(n_parents == ctx.n_containers) { 6162 line.type = MD_LINE_HTML; 6163 break; 6164 } 6165 } 6166 6167 /* Check for blank line. */ 6168 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6169 if(pivot_line.type == MD_LINE_INDENTEDCODE && n_parents == ctx.n_containers) { 6170 line.type = MD_LINE_INDENTEDCODE; 6171 if(line.indent > ctx.code_indent_offset) 6172 line.indent -= ctx.code_indent_offset; 6173 else 6174 line.indent = 0; 6175 ctx.last_line_has_list_loosening_effect = FALSE; 6176 } else { 6177 line.type = MD_LINE_BLANK; 6178 ctx.last_line_has_list_loosening_effect = (n_parents > 0 && 6179 n_brothers + n_children == 0 && 6180 ctx.containers[n_parents-1].ch != '>'); 6181 6182 /* See https://github.com/mity/md4c/issues/6 6183 * 6184 * This ugly checking tests we are in (yet empty) list item but not 6185 * its very first line (with the list item mark). 6186 * 6187 * If we are such blank line, then any following non-blank line 6188 * which would be part of this list item actually ends the list 6189 * because "a list item can begin with at most one blank line." 6190 */ 6191 if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && 6192 n_brothers + n_children == 0 && ctx.current_block == null && 6193 ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) 6194 { 6195 MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); 6196 if(top_block.type == MD_BLOCK_LI) 6197 ctx.last_list_item_starts_with_two_blank_lines = TRUE; 6198 } 6199 } 6200 break; 6201 } else { 6202 /* This is 2nd half of the hack. If the flag is set (that is there 6203 * were 2nd blank line at the start of the list item) and we would also 6204 * belonging to such list item, then interrupt the list. */ 6205 ctx.last_line_has_list_loosening_effect = FALSE; 6206 if(ctx.last_list_item_starts_with_two_blank_lines) { 6207 if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && 6208 n_brothers + n_children == 0 && ctx.current_block == null && 6209 ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) 6210 { 6211 MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); 6212 if(top_block.type == MD_BLOCK_LI) 6213 n_parents--; 6214 } 6215 6216 ctx.last_list_item_starts_with_two_blank_lines = FALSE; 6217 } 6218 } 6219 6220 /* Check whether we are Setext underline. */ 6221 if(line.indent < ctx.code_indent_offset && pivot_line.type == MD_LINE_TEXT 6222 && (ctx.CH(off) == '=' || ctx.CH(off) == '-') 6223 && (n_parents == ctx.n_containers)) 6224 { 6225 uint level; 6226 6227 if(md_is_setext_underline(ctx, off, &off, &level)) { 6228 line.type = MD_LINE_SETEXTUNDERLINE; 6229 line.data = level; 6230 break; 6231 } 6232 } 6233 6234 /* Check for thematic break line. */ 6235 if(line.indent < ctx.code_indent_offset && ctx.ISANYOF(off, "-_*") && off >= hr_killer) { 6236 if(md_is_hr_line(ctx, off, &off, &hr_killer)) { 6237 line.type = MD_LINE_HR; 6238 break; 6239 } 6240 } 6241 6242 /* Check for "brother" container. I.e. whether we are another list item 6243 * in already started list. */ 6244 if(n_parents < ctx.n_containers && n_brothers + n_children == 0) { 6245 OFF tmp; 6246 6247 if(md_is_container_mark(ctx, line.indent, off, &tmp, &container) && 6248 md_is_container_compatible(&ctx.containers[n_parents], &container)) 6249 { 6250 pivot_line = &md_dummy_blank_line; 6251 6252 off = tmp; 6253 6254 total_indent += container.contents_indent - container.mark_indent; 6255 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6256 total_indent += line.indent; 6257 line.beg = off; 6258 6259 /* Some of the following whitespace actually still belongs to the mark. */ 6260 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6261 container.contents_indent++; 6262 } else if(line.indent <= ctx.code_indent_offset) { 6263 container.contents_indent += line.indent; 6264 line.indent = 0; 6265 } else { 6266 container.contents_indent += 1; 6267 line.indent--; 6268 } 6269 6270 ctx.containers[n_parents].mark_indent = container.mark_indent; 6271 ctx.containers[n_parents].contents_indent = container.contents_indent; 6272 6273 n_brothers++; 6274 continue; 6275 } 6276 } 6277 6278 /* Check for indented code. 6279 * Note indented code block cannot interrupt a paragraph. */ 6280 if(line.indent >= ctx.code_indent_offset && 6281 (pivot_line.type == MD_LINE_BLANK || pivot_line.type == MD_LINE_INDENTEDCODE)) 6282 { 6283 line.type = MD_LINE_INDENTEDCODE; 6284 assert(line.indent >= ctx.code_indent_offset); 6285 line.indent -= ctx.code_indent_offset; 6286 line.data = 0; 6287 break; 6288 } 6289 6290 /* Check for start of a new container block. */ 6291 if(line.indent < ctx.code_indent_offset && 6292 md_is_container_mark(ctx, line.indent, off, &off, &container)) 6293 { 6294 if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && 6295 (off >= ctx.size || ctx.ISNEWLINE(off)) && container.ch != '>') 6296 { 6297 /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */ 6298 } else if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && 6299 (container.ch == '.' || container.ch == ')') && container.start != 1) 6300 { 6301 /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */ 6302 } else { 6303 total_indent += container.contents_indent - container.mark_indent; 6304 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6305 total_indent += line.indent; 6306 6307 line.beg = off; 6308 line.data = container.ch; 6309 6310 /* Some of the following whitespace actually still belongs to the mark. */ 6311 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6312 container.contents_indent++; 6313 } else if(line.indent <= ctx.code_indent_offset) { 6314 container.contents_indent += line.indent; 6315 line.indent = 0; 6316 } else { 6317 container.contents_indent += 1; 6318 line.indent--; 6319 } 6320 6321 if(n_brothers + n_children == 0) 6322 pivot_line = &md_dummy_blank_line; 6323 6324 if(n_children == 0) 6325 { 6326 ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); 6327 if (ret < 0) goto abort; 6328 } 6329 6330 n_children++; 6331 ret = (md_push_container(ctx, &container)); 6332 if (ret < 0) goto abort; 6333 continue; 6334 } 6335 } 6336 6337 /* Check whether we are table continuation. */ 6338 if(pivot_line.type == MD_LINE_TABLE && md_is_table_row(ctx, off, &off) && 6339 n_parents == ctx.n_containers) 6340 { 6341 line.type = MD_LINE_TABLE; 6342 break; 6343 } 6344 6345 /* Check for ATX header. */ 6346 if(line.indent < ctx.code_indent_offset && ctx.CH(off) == '#') { 6347 uint level; 6348 6349 if(md_is_atxheader_line(ctx, off, &line.beg, &off, &level)) { 6350 line.type = MD_LINE_ATXHEADER; 6351 line.data = level; 6352 break; 6353 } 6354 } 6355 6356 /* Check whether we are starting code fence. */ 6357 if(ctx.CH(off) == '`' || ctx.CH(off) == '~') { 6358 if(md_is_opening_code_fence(ctx, off, &off)) { 6359 line.type = MD_LINE_FENCEDCODE; 6360 line.data = 1; 6361 break; 6362 } 6363 } 6364 6365 /* Check for start of raw HTML block. */ 6366 if(ctx.CH(off) == '<' && !(ctx.parser.flags & MD_FLAG_NOHTMLBLOCKS)) 6367 { 6368 ctx.html_block_type = md_is_html_block_start_condition(ctx, off); 6369 6370 /* HTML block type 7 cannot interrupt paragraph. */ 6371 if(ctx.html_block_type == 7 && pivot_line.type == MD_LINE_TEXT) 6372 ctx.html_block_type = 0; 6373 6374 if(ctx.html_block_type > 0) { 6375 /* The line itself also may immediately close the block. */ 6376 if(md_is_html_block_end_condition(ctx, off, &off) == ctx.html_block_type) { 6377 /* Make sure this is the last line of the block. */ 6378 ctx.html_block_type = 0; 6379 } 6380 6381 line.type = MD_LINE_HTML; 6382 break; 6383 } 6384 } 6385 6386 /* Check for table underline. */ 6387 if((ctx.parser.flags & MD_FLAG_TABLES) && pivot_line.type == MD_LINE_TEXT && 6388 (ctx.CH(off) == '|' || ctx.CH(off) == '-' || ctx.CH(off) == ':') && 6389 n_parents == ctx.n_containers) 6390 { 6391 uint col_count; 6392 6393 if(ctx.current_block != null && ctx.current_block.n_lines == 1 && 6394 md_is_table_underline(ctx, off, &off, &col_count) && 6395 md_is_table_row(ctx, pivot_line.beg, null)) 6396 { 6397 line.data = col_count; 6398 line.type = MD_LINE_TABLEUNDERLINE; 6399 break; 6400 } 6401 } 6402 6403 /* By default, we are normal text line. */ 6404 line.type = MD_LINE_TEXT; 6405 if(pivot_line.type == MD_LINE_TEXT && n_brothers + n_children == 0) { 6406 /* Lazy continuation. */ 6407 n_parents = ctx.n_containers; 6408 } 6409 6410 /* Check for task mark. */ 6411 if((ctx.parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 && 6412 ISANYOF_(ctx.containers[ctx.n_containers-1].ch, "-+*.)")) 6413 { 6414 OFF tmp = off; 6415 6416 while(tmp < ctx.size && tmp < off + 3 && ctx.ISBLANK(tmp)) 6417 tmp++; 6418 if(tmp + 2 < ctx.size && ctx.CH(tmp) == '[' && 6419 ctx.ISANYOF(tmp+1, "xX ") && ctx.CH(tmp+2) == ']' && 6420 (tmp + 3 == ctx.size || ctx.ISBLANK(tmp+3) || ctx.ISNEWLINE(tmp+3))) 6421 { 6422 MD_CONTAINER* task_container = (n_children > 0 ? &ctx.containers[ctx.n_containers-1] : &container); 6423 task_container.is_task = TRUE; 6424 task_container.task_mark_off = tmp + 1; 6425 off = tmp + 3; 6426 while(ctx.ISWHITESPACE(off)) 6427 off++; 6428 line.beg = off; 6429 } 6430 } 6431 6432 break; 6433 } 6434 6435 /* Scan for end of the line. 6436 * 6437 * Note this is quite a bottleneck of the parsing as we here iterate almost 6438 * over compete document. 6439 */ 6440 { 6441 /* Optimization: Use some loop unrolling. */ 6442 while(off + 3 < ctx.size && !ctx.ISNEWLINE(off+0) && !ctx.ISNEWLINE(off+1) 6443 && !ctx.ISNEWLINE(off+2) && !ctx.ISNEWLINE(off+3)) 6444 off += 4; 6445 while(off < ctx.size && !ctx.ISNEWLINE(off)) 6446 off++; 6447 } 6448 6449 /* Set end of the line. */ 6450 line.end = off; 6451 6452 /* But for ATX header, we should exclude the optional trailing mark. */ 6453 if(line.type == MD_LINE_ATXHEADER) { 6454 OFF tmp = line.end; 6455 while(tmp > line.beg && ctx.CH(tmp-1) == ' ') 6456 tmp--; 6457 while(tmp > line.beg && ctx.CH(tmp-1) == '#') 6458 tmp--; 6459 if(tmp == line.beg || ctx.CH(tmp-1) == ' ' || (ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)) 6460 line.end = tmp; 6461 } 6462 6463 /* Trim trailing spaces. */ 6464 if(line.type != MD_LINE_INDENTEDCODE && line.type != MD_LINE_FENCEDCODE) { 6465 while(line.end > line.beg && ctx.CH(line.end-1) == ' ') 6466 line.end--; 6467 } 6468 6469 /* Eat also the new line. */ 6470 if(off < ctx.size && ctx.CH(off) == '\r') 6471 off++; 6472 if(off < ctx.size && ctx.CH(off) == '\n') 6473 off++; 6474 6475 *p_end = off; 6476 6477 /* If we belong to a list after seeing a blank line, the list is loose. */ 6478 if(prev_line_has_list_loosening_effect && line.type != MD_LINE_BLANK && n_parents + n_brothers > 0) { 6479 MD_CONTAINER* c = &ctx.containers[n_parents + n_brothers - 1]; 6480 if(c.ch != '>') { 6481 MD_BLOCK* block = cast(MD_BLOCK*) ((cast(char*)ctx.block_bytes) + c.block_byte_off); 6482 block.flags = block.flags | MD_BLOCK_LOOSE_LIST; 6483 } 6484 } 6485 6486 /* Leave any containers we are not part of anymore. */ 6487 if(n_children == 0 && n_parents + n_brothers < ctx.n_containers) 6488 { 6489 ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); 6490 if (ret < 0) goto abort; 6491 } 6492 6493 /* Enter any container we found a mark for. */ 6494 if(n_brothers > 0) { 6495 assert(n_brothers == 1); 6496 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6497 ctx.containers[n_parents].task_mark_off, 6498 (ctx.containers[n_parents].is_task ? ctx.CH(ctx.containers[n_parents].task_mark_off) : 0), 6499 MD_BLOCK_CONTAINER_CLOSER)); 6500 if (ret < 0) goto abort; 6501 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6502 container.task_mark_off, 6503 (container.is_task ? ctx.CH(container.task_mark_off) : 0), 6504 MD_BLOCK_CONTAINER_OPENER)); 6505 if (ret < 0) goto abort; 6506 ctx.containers[n_parents].is_task = container.is_task; 6507 ctx.containers[n_parents].task_mark_off = container.task_mark_off; 6508 } 6509 6510 if(n_children > 0) 6511 { 6512 ret = (md_enter_child_containers(ctx, n_children, line.data)); 6513 if (ret < 0) goto abort; 6514 } 6515 6516 abort: 6517 return ret; 6518 } 6519 6520 int md_process_line(MD_CTX* ctx, const(MD_LINE_ANALYSIS)** p_pivot_line, MD_LINE_ANALYSIS* line) 6521 { 6522 const(MD_LINE_ANALYSIS)* pivot_line = *p_pivot_line; 6523 int ret = 0; 6524 6525 /* Blank line ends current leaf block. */ 6526 if(line.type == MD_LINE_BLANK) { 6527 ret = (md_end_current_block(ctx)); 6528 if (ret < 0) goto abort; 6529 *p_pivot_line = &md_dummy_blank_line; 6530 return 0; 6531 } 6532 6533 /* Some line types form block on their own. */ 6534 if(line.type == MD_LINE_HR || line.type == MD_LINE_ATXHEADER) { 6535 ret = (md_end_current_block(ctx)); 6536 if (ret < 0) goto abort; 6537 6538 /* Add our single-line block. */ 6539 ret = (md_start_new_block(ctx, line)); 6540 if (ret < 0) goto abort; 6541 ret = (md_add_line_into_current_block(ctx, line)); 6542 if (ret < 0) goto abort; 6543 ret = (md_end_current_block(ctx)); 6544 if (ret < 0) goto abort; 6545 *p_pivot_line = &md_dummy_blank_line; 6546 return 0; 6547 } 6548 6549 /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */ 6550 if(line.type == MD_LINE_SETEXTUNDERLINE) { 6551 assert(ctx.current_block != null); 6552 ctx.current_block.type = MD_BLOCK_H; 6553 ctx.current_block.data = line.data; 6554 ctx.current_block.flags = ctx.current_block.flags | MD_BLOCK_SETEXT_HEADER; 6555 ret = (md_add_line_into_current_block(ctx, line)); 6556 if (ret < 0) goto abort; 6557 ret = (md_end_current_block(ctx)); 6558 if (ret < 0) goto abort; 6559 if(ctx.current_block == null) { 6560 *p_pivot_line = &md_dummy_blank_line; 6561 } else { 6562 /* This happens if we have consumed all the body as link ref. defs. 6563 * and downgraded the underline into start of a new paragraph block. */ 6564 line.type = MD_LINE_TEXT; 6565 *p_pivot_line = line; 6566 } 6567 return 0; 6568 } 6569 6570 /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */ 6571 if(line.type == MD_LINE_TABLEUNDERLINE) { 6572 assert(ctx.current_block != null); 6573 assert(ctx.current_block.n_lines == 1); 6574 ctx.current_block.type = MD_BLOCK_TABLE; 6575 ctx.current_block.data = line.data; 6576 assert(pivot_line != &md_dummy_blank_line); 6577 (cast(MD_LINE_ANALYSIS*)pivot_line).type = MD_LINE_TABLE; 6578 ret = (md_add_line_into_current_block(ctx, line)); 6579 if (ret < 0) goto abort; 6580 return 0; 6581 } 6582 6583 /* The current block also ends if the line has different type. */ 6584 if(line.type != pivot_line.type) 6585 { 6586 ret = (md_end_current_block(ctx)); 6587 if (ret < 0) goto abort; 6588 } 6589 6590 /* The current line may start a new block. */ 6591 if(ctx.current_block == null) { 6592 ret = (md_start_new_block(ctx, line)); 6593 if (ret < 0) goto abort; 6594 *p_pivot_line = line; 6595 } 6596 6597 /* In all other cases the line is just a continuation of the current block. */ 6598 ret = (md_add_line_into_current_block(ctx, line)); 6599 if (ret < 0) goto abort; 6600 6601 abort: 6602 return ret; 6603 } 6604 6605 int md_process_doc(MD_CTX *ctx) 6606 { 6607 const(MD_LINE_ANALYSIS)* pivot_line = &md_dummy_blank_line; 6608 MD_LINE_ANALYSIS[2] line_buf; 6609 MD_LINE_ANALYSIS* line = &line_buf[0]; 6610 OFF off = 0; 6611 int ret = 0; 6612 6613 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_DOC, null); 6614 if (ret != 0) goto abort; 6615 6616 while(off < ctx.size) { 6617 if(line == pivot_line) 6618 line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]); 6619 6620 ret = (md_analyze_line(ctx, off, &off, pivot_line, line)); 6621 if (ret < 0) goto abort; 6622 ret = (md_process_line(ctx, &pivot_line, line)); 6623 if (ret < 0) goto abort; 6624 } 6625 6626 md_end_current_block(ctx); 6627 6628 ret = (md_build_ref_def_hashtable(ctx)); 6629 if (ret < 0) goto abort; 6630 6631 /* Process all blocks. */ 6632 ret = (md_leave_child_containers(ctx, 0)); 6633 if (ret < 0) goto abort; 6634 ret = (md_process_all_blocks(ctx)); 6635 if (ret < 0) goto abort; 6636 6637 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_DOC, null); 6638 if (ret != 0) goto abort; 6639 6640 abort: 6641 6642 debug(bench) 6643 /* Output some memory consumption statistics. */ 6644 { 6645 char[256] buffer; 6646 sprintf(buffer, "Alloced %u bytes for block buffer.", 6647 cast(uint)(ctx.alloc_block_bytes)); 6648 ctx.MD_LOG(buffer); 6649 6650 sprintf(buffer, "Alloced %u bytes for containers buffer.", 6651 cast(uint)(ctx.alloc_containers * MD_CONTAINER.sizeof)); 6652 ctx.MD_LOG(buffer); 6653 6654 sprintf(buffer, "Alloced %u bytes for marks buffer.", 6655 cast(uint)(ctx.alloc_marks * MD_MARK.sizeof)); 6656 ctx.MD_LOG(buffer); 6657 6658 sprintf(buffer, "Alloced %u bytes for aux. buffer.", 6659 cast(uint)(ctx.alloc_buffer * MD_CHAR.sizeof)); 6660 ctx.MD_LOG(buffer); 6661 } 6662 6663 return ret; 6664 } 6665 6666 6667 /******************** 6668 *** Public API *** 6669 ********************/ 6670 6671 /** 6672 * Parse the Markdown document stored in the string 'text' of size 'size'. 6673 * The renderer provides callbacks to be called during the parsing so the 6674 * caller can render the document on the screen or convert the Markdown 6675 * to another format. 6676 * 6677 * Zero is returned on success. If a runtime error occurs (e.g. a memory 6678 * fails), -1 is returned. If the processing is aborted due any callback 6679 * returning non-zero, md_parse() the return value of the callback is returned. 6680 */ 6681 int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata) 6682 { 6683 MD_CTX ctx; 6684 int i; 6685 int ret; 6686 6687 if(parser.abi_version != 0) { 6688 if(parser.debug_log != null) 6689 parser.debug_log("Unsupported abi_version.", userdata); 6690 return -1; 6691 } 6692 6693 /* Setup context structure. */ 6694 memset(&ctx, 0, MD_CTX.sizeof); 6695 ctx.text = text; 6696 ctx.size = size; 6697 memcpy(&ctx.parser, parser, MD_PARSER.sizeof); 6698 ctx.userdata = userdata; 6699 ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? cast(OFF)(-1) : 4; 6700 md_build_mark_char_map(&ctx); 6701 ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1])); 6702 6703 /* Reset all unresolved opener mark chains. */ 6704 for(i = 0; i < cast(int) (ctx.mark_chains.length); i++) { 6705 ctx.mark_chains[i].head = -1; 6706 ctx.mark_chains[i].tail = -1; 6707 } 6708 ctx.unresolved_link_head = -1; 6709 ctx.unresolved_link_tail = -1; 6710 6711 /* All the work. */ 6712 ret = md_process_doc(&ctx); 6713 6714 /* Clean-up. */ 6715 md_free_ref_defs(&ctx); 6716 md_free_ref_def_hashtable(&ctx); 6717 free(ctx.buffer); 6718 free(ctx.marks); 6719 free(ctx.block_bytes); 6720 free(ctx.containers); 6721 6722 return ret; 6723 } 6724 6725 // 6726 // HTML ENTITIES 6727 // 6728 6729 /* Most entities are formed by single Unicode codepoint, few by two codepoints. 6730 * Single-codepoint entities have codepoints[1] set to zero. */ 6731 struct entity 6732 { 6733 const(char)* name; 6734 uint[2] codepoints; 6735 } 6736 6737 /* The table is generated from https://html.spec.whatwg.org/entities.json */ 6738 static immutable entity[] entity_table = 6739 [ 6740 entity( "Æ", [ 198, 0 ] ), 6741 entity( "&", [ 38, 0 ] ), 6742 entity( "Á", [ 193, 0 ] ), 6743 entity( "Ă", [ 258, 0 ] ), 6744 entity( "Â", [ 194, 0 ] ), 6745 entity( "А", [ 1040, 0 ] ), 6746 entity( "𝔄", [ 120068, 0 ] ), 6747 entity( "À", [ 192, 0 ] ), 6748 entity( "Α", [ 913, 0 ] ), 6749 entity( "Ā", [ 256, 0 ] ), 6750 entity( "⩓", [ 10835, 0 ] ), 6751 entity( "Ą", [ 260, 0 ] ), 6752 entity( "𝔸", [ 120120, 0 ] ), 6753 entity( "⁡", [ 8289, 0 ] ), 6754 entity( "Å", [ 197, 0 ] ), 6755 entity( "𝒜", [ 119964, 0 ] ), 6756 entity( "≔", [ 8788, 0 ] ), 6757 entity( "Ã", [ 195, 0 ] ), 6758 entity( "Ä", [ 196, 0 ] ), 6759 entity( "∖", [ 8726, 0 ] ), 6760 entity( "⫧", [ 10983, 0 ] ), 6761 entity( "⌆", [ 8966, 0 ] ), 6762 entity( "Б", [ 1041, 0 ] ), 6763 entity( "∵", [ 8757, 0 ] ), 6764 entity( "ℬ", [ 8492, 0 ] ), 6765 entity( "Β", [ 914, 0 ] ), 6766 entity( "𝔅", [ 120069, 0 ] ), 6767 entity( "𝔹", [ 120121, 0 ] ), 6768 entity( "˘", [ 728, 0 ] ), 6769 entity( "ℬ", [ 8492, 0 ] ), 6770 entity( "≎", [ 8782, 0 ] ), 6771 entity( "Ч", [ 1063, 0 ] ), 6772 entity( "©", [ 169, 0 ] ), 6773 entity( "Ć", [ 262, 0 ] ), 6774 entity( "⋒", [ 8914, 0 ] ), 6775 entity( "ⅅ", [ 8517, 0 ] ), 6776 entity( "ℭ", [ 8493, 0 ] ), 6777 entity( "Č", [ 268, 0 ] ), 6778 entity( "Ç", [ 199, 0 ] ), 6779 entity( "Ĉ", [ 264, 0 ] ), 6780 entity( "∰", [ 8752, 0 ] ), 6781 entity( "Ċ", [ 266, 0 ] ), 6782 entity( "¸", [ 184, 0 ] ), 6783 entity( "·", [ 183, 0 ] ), 6784 entity( "ℭ", [ 8493, 0 ] ), 6785 entity( "Χ", [ 935, 0 ] ), 6786 entity( "⊙", [ 8857, 0 ] ), 6787 entity( "⊖", [ 8854, 0 ] ), 6788 entity( "⊕", [ 8853, 0 ] ), 6789 entity( "⊗", [ 8855, 0 ] ), 6790 entity( "∲", [ 8754, 0 ] ), 6791 entity( "”", [ 8221, 0 ] ), 6792 entity( "’", [ 8217, 0 ] ), 6793 entity( "∷", [ 8759, 0 ] ), 6794 entity( "⩴", [ 10868, 0 ] ), 6795 entity( "≡", [ 8801, 0 ] ), 6796 entity( "∯", [ 8751, 0 ] ), 6797 entity( "∮", [ 8750, 0 ] ), 6798 entity( "ℂ", [ 8450, 0 ] ), 6799 entity( "∐", [ 8720, 0 ] ), 6800 entity( "∳", [ 8755, 0 ] ), 6801 entity( "⨯", [ 10799, 0 ] ), 6802 entity( "𝒞", [ 119966, 0 ] ), 6803 entity( "⋓", [ 8915, 0 ] ), 6804 entity( "≍", [ 8781, 0 ] ), 6805 entity( "ⅅ", [ 8517, 0 ] ), 6806 entity( "⤑", [ 10513, 0 ] ), 6807 entity( "Ђ", [ 1026, 0 ] ), 6808 entity( "Ѕ", [ 1029, 0 ] ), 6809 entity( "Џ", [ 1039, 0 ] ), 6810 entity( "‡", [ 8225, 0 ] ), 6811 entity( "↡", [ 8609, 0 ] ), 6812 entity( "⫤", [ 10980, 0 ] ), 6813 entity( "Ď", [ 270, 0 ] ), 6814 entity( "Д", [ 1044, 0 ] ), 6815 entity( "∇", [ 8711, 0 ] ), 6816 entity( "Δ", [ 916, 0 ] ), 6817 entity( "𝔇", [ 120071, 0 ] ), 6818 entity( "´", [ 180, 0 ] ), 6819 entity( "˙", [ 729, 0 ] ), 6820 entity( "˝", [ 733, 0 ] ), 6821 entity( "`", [ 96, 0 ] ), 6822 entity( "˜", [ 732, 0 ] ), 6823 entity( "⋄", [ 8900, 0 ] ), 6824 entity( "ⅆ", [ 8518, 0 ] ), 6825 entity( "𝔻", [ 120123, 0 ] ), 6826 entity( "¨", [ 168, 0 ] ), 6827 entity( "⃜", [ 8412, 0 ] ), 6828 entity( "≐", [ 8784, 0 ] ), 6829 entity( "∯", [ 8751, 0 ] ), 6830 entity( "¨", [ 168, 0 ] ), 6831 entity( "⇓", [ 8659, 0 ] ), 6832 entity( "⇐", [ 8656, 0 ] ), 6833 entity( "⇔", [ 8660, 0 ] ), 6834 entity( "⫤", [ 10980, 0 ] ), 6835 entity( "⟸", [ 10232, 0 ] ), 6836 entity( "⟺", [ 10234, 0 ] ), 6837 entity( "⟹", [ 10233, 0 ] ), 6838 entity( "⇒", [ 8658, 0 ] ), 6839 entity( "⊨", [ 8872, 0 ] ), 6840 entity( "⇑", [ 8657, 0 ] ), 6841 entity( "⇕", [ 8661, 0 ] ), 6842 entity( "∥", [ 8741, 0 ] ), 6843 entity( "↓", [ 8595, 0 ] ), 6844 entity( "⤓", [ 10515, 0 ] ), 6845 entity( "⇵", [ 8693, 0 ] ), 6846 entity( "̑", [ 785, 0 ] ), 6847 entity( "⥐", [ 10576, 0 ] ), 6848 entity( "⥞", [ 10590, 0 ] ), 6849 entity( "↽", [ 8637, 0 ] ), 6850 entity( "⥖", [ 10582, 0 ] ), 6851 entity( "⥟", [ 10591, 0 ] ), 6852 entity( "⇁", [ 8641, 0 ] ), 6853 entity( "⥗", [ 10583, 0 ] ), 6854 entity( "⊤", [ 8868, 0 ] ), 6855 entity( "↧", [ 8615, 0 ] ), 6856 entity( "⇓", [ 8659, 0 ] ), 6857 entity( "𝒟", [ 119967, 0 ] ), 6858 entity( "Đ", [ 272, 0 ] ), 6859 entity( "Ŋ", [ 330, 0 ] ), 6860 entity( "Ð", [ 208, 0 ] ), 6861 entity( "É", [ 201, 0 ] ), 6862 entity( "Ě", [ 282, 0 ] ), 6863 entity( "Ê", [ 202, 0 ] ), 6864 entity( "Э", [ 1069, 0 ] ), 6865 entity( "Ė", [ 278, 0 ] ), 6866 entity( "𝔈", [ 120072, 0 ] ), 6867 entity( "È", [ 200, 0 ] ), 6868 entity( "∈", [ 8712, 0 ] ), 6869 entity( "Ē", [ 274, 0 ] ), 6870 entity( "◻", [ 9723, 0 ] ), 6871 entity( "▫", [ 9643, 0 ] ), 6872 entity( "Ę", [ 280, 0 ] ), 6873 entity( "𝔼", [ 120124, 0 ] ), 6874 entity( "Ε", [ 917, 0 ] ), 6875 entity( "⩵", [ 10869, 0 ] ), 6876 entity( "≂", [ 8770, 0 ] ), 6877 entity( "⇌", [ 8652, 0 ] ), 6878 entity( "ℰ", [ 8496, 0 ] ), 6879 entity( "⩳", [ 10867, 0 ] ), 6880 entity( "Η", [ 919, 0 ] ), 6881 entity( "Ë", [ 203, 0 ] ), 6882 entity( "∃", [ 8707, 0 ] ), 6883 entity( "ⅇ", [ 8519, 0 ] ), 6884 entity( "Ф", [ 1060, 0 ] ), 6885 entity( "𝔉", [ 120073, 0 ] ), 6886 entity( "◼", [ 9724, 0 ] ), 6887 entity( "▪", [ 9642, 0 ] ), 6888 entity( "𝔽", [ 120125, 0 ] ), 6889 entity( "∀", [ 8704, 0 ] ), 6890 entity( "ℱ", [ 8497, 0 ] ), 6891 entity( "ℱ", [ 8497, 0 ] ), 6892 entity( "Ѓ", [ 1027, 0 ] ), 6893 entity( ">", [ 62, 0 ] ), 6894 entity( "Γ", [ 915, 0 ] ), 6895 entity( "Ϝ", [ 988, 0 ] ), 6896 entity( "Ğ", [ 286, 0 ] ), 6897 entity( "Ģ", [ 290, 0 ] ), 6898 entity( "Ĝ", [ 284, 0 ] ), 6899 entity( "Г", [ 1043, 0 ] ), 6900 entity( "Ġ", [ 288, 0 ] ), 6901 entity( "𝔊", [ 120074, 0 ] ), 6902 entity( "⋙", [ 8921, 0 ] ), 6903 entity( "𝔾", [ 120126, 0 ] ), 6904 entity( "≥", [ 8805, 0 ] ), 6905 entity( "⋛", [ 8923, 0 ] ), 6906 entity( "≧", [ 8807, 0 ] ), 6907 entity( "⪢", [ 10914, 0 ] ), 6908 entity( "≷", [ 8823, 0 ] ), 6909 entity( "⩾", [ 10878, 0 ] ), 6910 entity( "≳", [ 8819, 0 ] ), 6911 entity( "𝒢", [ 119970, 0 ] ), 6912 entity( "≫", [ 8811, 0 ] ), 6913 entity( "Ъ", [ 1066, 0 ] ), 6914 entity( "ˇ", [ 711, 0 ] ), 6915 entity( "^", [ 94, 0 ] ), 6916 entity( "Ĥ", [ 292, 0 ] ), 6917 entity( "ℌ", [ 8460, 0 ] ), 6918 entity( "ℋ", [ 8459, 0 ] ), 6919 entity( "ℍ", [ 8461, 0 ] ), 6920 entity( "─", [ 9472, 0 ] ), 6921 entity( "ℋ", [ 8459, 0 ] ), 6922 entity( "Ħ", [ 294, 0 ] ), 6923 entity( "≎", [ 8782, 0 ] ), 6924 entity( "≏", [ 8783, 0 ] ), 6925 entity( "Е", [ 1045, 0 ] ), 6926 entity( "IJ", [ 306, 0 ] ), 6927 entity( "Ё", [ 1025, 0 ] ), 6928 entity( "Í", [ 205, 0 ] ), 6929 entity( "Î", [ 206, 0 ] ), 6930 entity( "И", [ 1048, 0 ] ), 6931 entity( "İ", [ 304, 0 ] ), 6932 entity( "ℑ", [ 8465, 0 ] ), 6933 entity( "Ì", [ 204, 0 ] ), 6934 entity( "ℑ", [ 8465, 0 ] ), 6935 entity( "Ī", [ 298, 0 ] ), 6936 entity( "ⅈ", [ 8520, 0 ] ), 6937 entity( "⇒", [ 8658, 0 ] ), 6938 entity( "∬", [ 8748, 0 ] ), 6939 entity( "∫", [ 8747, 0 ] ), 6940 entity( "⋂", [ 8898, 0 ] ), 6941 entity( "⁣", [ 8291, 0 ] ), 6942 entity( "⁢", [ 8290, 0 ] ), 6943 entity( "Į", [ 302, 0 ] ), 6944 entity( "𝕀", [ 120128, 0 ] ), 6945 entity( "Ι", [ 921, 0 ] ), 6946 entity( "ℐ", [ 8464, 0 ] ), 6947 entity( "Ĩ", [ 296, 0 ] ), 6948 entity( "І", [ 1030, 0 ] ), 6949 entity( "Ï", [ 207, 0 ] ), 6950 entity( "Ĵ", [ 308, 0 ] ), 6951 entity( "Й", [ 1049, 0 ] ), 6952 entity( "𝔍", [ 120077, 0 ] ), 6953 entity( "𝕁", [ 120129, 0 ] ), 6954 entity( "𝒥", [ 119973, 0 ] ), 6955 entity( "Ј", [ 1032, 0 ] ), 6956 entity( "Є", [ 1028, 0 ] ), 6957 entity( "Х", [ 1061, 0 ] ), 6958 entity( "Ќ", [ 1036, 0 ] ), 6959 entity( "Κ", [ 922, 0 ] ), 6960 entity( "Ķ", [ 310, 0 ] ), 6961 entity( "К", [ 1050, 0 ] ), 6962 entity( "𝔎", [ 120078, 0 ] ), 6963 entity( "𝕂", [ 120130, 0 ] ), 6964 entity( "𝒦", [ 119974, 0 ] ), 6965 entity( "Љ", [ 1033, 0 ] ), 6966 entity( "<", [ 60, 0 ] ), 6967 entity( "Ĺ", [ 313, 0 ] ), 6968 entity( "Λ", [ 923, 0 ] ), 6969 entity( "⟪", [ 10218, 0 ] ), 6970 entity( "ℒ", [ 8466, 0 ] ), 6971 entity( "↞", [ 8606, 0 ] ), 6972 entity( "Ľ", [ 317, 0 ] ), 6973 entity( "Ļ", [ 315, 0 ] ), 6974 entity( "Л", [ 1051, 0 ] ), 6975 entity( "⟨", [ 10216, 0 ] ), 6976 entity( "←", [ 8592, 0 ] ), 6977 entity( "⇤", [ 8676, 0 ] ), 6978 entity( "⇆", [ 8646, 0 ] ), 6979 entity( "⌈", [ 8968, 0 ] ), 6980 entity( "⟦", [ 10214, 0 ] ), 6981 entity( "⥡", [ 10593, 0 ] ), 6982 entity( "⇃", [ 8643, 0 ] ), 6983 entity( "⥙", [ 10585, 0 ] ), 6984 entity( "⌊", [ 8970, 0 ] ), 6985 entity( "↔", [ 8596, 0 ] ), 6986 entity( "⥎", [ 10574, 0 ] ), 6987 entity( "⊣", [ 8867, 0 ] ), 6988 entity( "↤", [ 8612, 0 ] ), 6989 entity( "⥚", [ 10586, 0 ] ), 6990 entity( "⊲", [ 8882, 0 ] ), 6991 entity( "⧏", [ 10703, 0 ] ), 6992 entity( "⊴", [ 8884, 0 ] ), 6993 entity( "⥑", [ 10577, 0 ] ), 6994 entity( "⥠", [ 10592, 0 ] ), 6995 entity( "↿", [ 8639, 0 ] ), 6996 entity( "⥘", [ 10584, 0 ] ), 6997 entity( "↼", [ 8636, 0 ] ), 6998 entity( "⥒", [ 10578, 0 ] ), 6999 entity( "⇐", [ 8656, 0 ] ), 7000 entity( "⇔", [ 8660, 0 ] ), 7001 entity( "⋚", [ 8922, 0 ] ), 7002 entity( "≦", [ 8806, 0 ] ), 7003 entity( "≶", [ 8822, 0 ] ), 7004 entity( "⪡", [ 10913, 0 ] ), 7005 entity( "⩽", [ 10877, 0 ] ), 7006 entity( "≲", [ 8818, 0 ] ), 7007 entity( "𝔏", [ 120079, 0 ] ), 7008 entity( "⋘", [ 8920, 0 ] ), 7009 entity( "⇚", [ 8666, 0 ] ), 7010 entity( "Ŀ", [ 319, 0 ] ), 7011 entity( "⟵", [ 10229, 0 ] ), 7012 entity( "⟷", [ 10231, 0 ] ), 7013 entity( "⟶", [ 10230, 0 ] ), 7014 entity( "⟸", [ 10232, 0 ] ), 7015 entity( "⟺", [ 10234, 0 ] ), 7016 entity( "⟹", [ 10233, 0 ] ), 7017 entity( "𝕃", [ 120131, 0 ] ), 7018 entity( "↙", [ 8601, 0 ] ), 7019 entity( "↘", [ 8600, 0 ] ), 7020 entity( "ℒ", [ 8466, 0 ] ), 7021 entity( "↰", [ 8624, 0 ] ), 7022 entity( "Ł", [ 321, 0 ] ), 7023 entity( "≪", [ 8810, 0 ] ), 7024 entity( "⤅", [ 10501, 0 ] ), 7025 entity( "М", [ 1052, 0 ] ), 7026 entity( " ", [ 8287, 0 ] ), 7027 entity( "ℳ", [ 8499, 0 ] ), 7028 entity( "𝔐", [ 120080, 0 ] ), 7029 entity( "∓", [ 8723, 0 ] ), 7030 entity( "𝕄", [ 120132, 0 ] ), 7031 entity( "ℳ", [ 8499, 0 ] ), 7032 entity( "Μ", [ 924, 0 ] ), 7033 entity( "Њ", [ 1034, 0 ] ), 7034 entity( "Ń", [ 323, 0 ] ), 7035 entity( "Ň", [ 327, 0 ] ), 7036 entity( "Ņ", [ 325, 0 ] ), 7037 entity( "Н", [ 1053, 0 ] ), 7038 entity( "​", [ 8203, 0 ] ), 7039 entity( "​", [ 8203, 0 ] ), 7040 entity( "​", [ 8203, 0 ] ), 7041 entity( "​", [ 8203, 0 ] ), 7042 entity( "≫", [ 8811, 0 ] ), 7043 entity( "≪", [ 8810, 0 ] ), 7044 entity( "
", [ 10, 0 ] ), 7045 entity( "𝔑", [ 120081, 0 ] ), 7046 entity( "⁠", [ 8288, 0 ] ), 7047 entity( " ", [ 160, 0 ] ), 7048 entity( "ℕ", [ 8469, 0 ] ), 7049 entity( "⫬", [ 10988, 0 ] ), 7050 entity( "≢", [ 8802, 0 ] ), 7051 entity( "≭", [ 8813, 0 ] ), 7052 entity( "∦", [ 8742, 0 ] ), 7053 entity( "∉", [ 8713, 0 ] ), 7054 entity( "≠", [ 8800, 0 ] ), 7055 entity( "≂̸", [ 8770, 824 ] ), 7056 entity( "∄", [ 8708, 0 ] ), 7057 entity( "≯", [ 8815, 0 ] ), 7058 entity( "≱", [ 8817, 0 ] ), 7059 entity( "≧̸", [ 8807, 824 ] ), 7060 entity( "≫̸", [ 8811, 824 ] ), 7061 entity( "≹", [ 8825, 0 ] ), 7062 entity( "⩾̸", [ 10878, 824 ] ), 7063 entity( "≵", [ 8821, 0 ] ), 7064 entity( "≎̸", [ 8782, 824 ] ), 7065 entity( "≏̸", [ 8783, 824 ] ), 7066 entity( "⋪", [ 8938, 0 ] ), 7067 entity( "⧏̸", [ 10703, 824 ] ), 7068 entity( "⋬", [ 8940, 0 ] ), 7069 entity( "≮", [ 8814, 0 ] ), 7070 entity( "≰", [ 8816, 0 ] ), 7071 entity( "≸", [ 8824, 0 ] ), 7072 entity( "≪̸", [ 8810, 824 ] ), 7073 entity( "⩽̸", [ 10877, 824 ] ), 7074 entity( "≴", [ 8820, 0 ] ), 7075 entity( "⪢̸", [ 10914, 824 ] ), 7076 entity( "⪡̸", [ 10913, 824 ] ), 7077 entity( "⊀", [ 8832, 0 ] ), 7078 entity( "⪯̸", [ 10927, 824 ] ), 7079 entity( "⋠", [ 8928, 0 ] ), 7080 entity( "∌", [ 8716, 0 ] ), 7081 entity( "⋫", [ 8939, 0 ] ), 7082 entity( "⧐̸", [ 10704, 824 ] ), 7083 entity( "⋭", [ 8941, 0 ] ), 7084 entity( "⊏̸", [ 8847, 824 ] ), 7085 entity( "⋢", [ 8930, 0 ] ), 7086 entity( "⊐̸", [ 8848, 824 ] ), 7087 entity( "⋣", [ 8931, 0 ] ), 7088 entity( "⊂⃒", [ 8834, 8402 ] ), 7089 entity( "⊈", [ 8840, 0 ] ), 7090 entity( "⊁", [ 8833, 0 ] ), 7091 entity( "⪰̸", [ 10928, 824 ] ), 7092 entity( "⋡", [ 8929, 0 ] ), 7093 entity( "≿̸", [ 8831, 824 ] ), 7094 entity( "⊃⃒", [ 8835, 8402 ] ), 7095 entity( "⊉", [ 8841, 0 ] ), 7096 entity( "≁", [ 8769, 0 ] ), 7097 entity( "≄", [ 8772, 0 ] ), 7098 entity( "≇", [ 8775, 0 ] ), 7099 entity( "≉", [ 8777, 0 ] ), 7100 entity( "∤", [ 8740, 0 ] ), 7101 entity( "𝒩", [ 119977, 0 ] ), 7102 entity( "Ñ", [ 209, 0 ] ), 7103 entity( "Ν", [ 925, 0 ] ), 7104 entity( "Œ", [ 338, 0 ] ), 7105 entity( "Ó", [ 211, 0 ] ), 7106 entity( "Ô", [ 212, 0 ] ), 7107 entity( "О", [ 1054, 0 ] ), 7108 entity( "Ő", [ 336, 0 ] ), 7109 entity( "𝔒", [ 120082, 0 ] ), 7110 entity( "Ò", [ 210, 0 ] ), 7111 entity( "Ō", [ 332, 0 ] ), 7112 entity( "Ω", [ 937, 0 ] ), 7113 entity( "Ο", [ 927, 0 ] ), 7114 entity( "𝕆", [ 120134, 0 ] ), 7115 entity( "“", [ 8220, 0 ] ), 7116 entity( "‘", [ 8216, 0 ] ), 7117 entity( "⩔", [ 10836, 0 ] ), 7118 entity( "𝒪", [ 119978, 0 ] ), 7119 entity( "Ø", [ 216, 0 ] ), 7120 entity( "Õ", [ 213, 0 ] ), 7121 entity( "⨷", [ 10807, 0 ] ), 7122 entity( "Ö", [ 214, 0 ] ), 7123 entity( "‾", [ 8254, 0 ] ), 7124 entity( "⏞", [ 9182, 0 ] ), 7125 entity( "⎴", [ 9140, 0 ] ), 7126 entity( "⏜", [ 9180, 0 ] ), 7127 entity( "∂", [ 8706, 0 ] ), 7128 entity( "П", [ 1055, 0 ] ), 7129 entity( "𝔓", [ 120083, 0 ] ), 7130 entity( "Φ", [ 934, 0 ] ), 7131 entity( "Π", [ 928, 0 ] ), 7132 entity( "±", [ 177, 0 ] ), 7133 entity( "ℌ", [ 8460, 0 ] ), 7134 entity( "ℙ", [ 8473, 0 ] ), 7135 entity( "⪻", [ 10939, 0 ] ), 7136 entity( "≺", [ 8826, 0 ] ), 7137 entity( "⪯", [ 10927, 0 ] ), 7138 entity( "≼", [ 8828, 0 ] ), 7139 entity( "≾", [ 8830, 0 ] ), 7140 entity( "″", [ 8243, 0 ] ), 7141 entity( "∏", [ 8719, 0 ] ), 7142 entity( "∷", [ 8759, 0 ] ), 7143 entity( "∝", [ 8733, 0 ] ), 7144 entity( "𝒫", [ 119979, 0 ] ), 7145 entity( "Ψ", [ 936, 0 ] ), 7146 entity( """, [ 34, 0 ] ), 7147 entity( "𝔔", [ 120084, 0 ] ), 7148 entity( "ℚ", [ 8474, 0 ] ), 7149 entity( "𝒬", [ 119980, 0 ] ), 7150 entity( "⤐", [ 10512, 0 ] ), 7151 entity( "®", [ 174, 0 ] ), 7152 entity( "Ŕ", [ 340, 0 ] ), 7153 entity( "⟫", [ 10219, 0 ] ), 7154 entity( "↠", [ 8608, 0 ] ), 7155 entity( "⤖", [ 10518, 0 ] ), 7156 entity( "Ř", [ 344, 0 ] ), 7157 entity( "Ŗ", [ 342, 0 ] ), 7158 entity( "Р", [ 1056, 0 ] ), 7159 entity( "ℜ", [ 8476, 0 ] ), 7160 entity( "∋", [ 8715, 0 ] ), 7161 entity( "⇋", [ 8651, 0 ] ), 7162 entity( "⥯", [ 10607, 0 ] ), 7163 entity( "ℜ", [ 8476, 0 ] ), 7164 entity( "Ρ", [ 929, 0 ] ), 7165 entity( "⟩", [ 10217, 0 ] ), 7166 entity( "→", [ 8594, 0 ] ), 7167 entity( "⇥", [ 8677, 0 ] ), 7168 entity( "⇄", [ 8644, 0 ] ), 7169 entity( "⌉", [ 8969, 0 ] ), 7170 entity( "⟧", [ 10215, 0 ] ), 7171 entity( "⥝", [ 10589, 0 ] ), 7172 entity( "⇂", [ 8642, 0 ] ), 7173 entity( "⥕", [ 10581, 0 ] ), 7174 entity( "⌋", [ 8971, 0 ] ), 7175 entity( "⊢", [ 8866, 0 ] ), 7176 entity( "↦", [ 8614, 0 ] ), 7177 entity( "⥛", [ 10587, 0 ] ), 7178 entity( "⊳", [ 8883, 0 ] ), 7179 entity( "⧐", [ 10704, 0 ] ), 7180 entity( "⊵", [ 8885, 0 ] ), 7181 entity( "⥏", [ 10575, 0 ] ), 7182 entity( "⥜", [ 10588, 0 ] ), 7183 entity( "↾", [ 8638, 0 ] ), 7184 entity( "⥔", [ 10580, 0 ] ), 7185 entity( "⇀", [ 8640, 0 ] ), 7186 entity( "⥓", [ 10579, 0 ] ), 7187 entity( "⇒", [ 8658, 0 ] ), 7188 entity( "ℝ", [ 8477, 0 ] ), 7189 entity( "⥰", [ 10608, 0 ] ), 7190 entity( "⇛", [ 8667, 0 ] ), 7191 entity( "ℛ", [ 8475, 0 ] ), 7192 entity( "↱", [ 8625, 0 ] ), 7193 entity( "⧴", [ 10740, 0 ] ), 7194 entity( "Щ", [ 1065, 0 ] ), 7195 entity( "Ш", [ 1064, 0 ] ), 7196 entity( "Ь", [ 1068, 0 ] ), 7197 entity( "Ś", [ 346, 0 ] ), 7198 entity( "⪼", [ 10940, 0 ] ), 7199 entity( "Š", [ 352, 0 ] ), 7200 entity( "Ş", [ 350, 0 ] ), 7201 entity( "Ŝ", [ 348, 0 ] ), 7202 entity( "С", [ 1057, 0 ] ), 7203 entity( "𝔖", [ 120086, 0 ] ), 7204 entity( "↓", [ 8595, 0 ] ), 7205 entity( "←", [ 8592, 0 ] ), 7206 entity( "→", [ 8594, 0 ] ), 7207 entity( "↑", [ 8593, 0 ] ), 7208 entity( "Σ", [ 931, 0 ] ), 7209 entity( "∘", [ 8728, 0 ] ), 7210 entity( "𝕊", [ 120138, 0 ] ), 7211 entity( "√", [ 8730, 0 ] ), 7212 entity( "□", [ 9633, 0 ] ), 7213 entity( "⊓", [ 8851, 0 ] ), 7214 entity( "⊏", [ 8847, 0 ] ), 7215 entity( "⊑", [ 8849, 0 ] ), 7216 entity( "⊐", [ 8848, 0 ] ), 7217 entity( "⊒", [ 8850, 0 ] ), 7218 entity( "⊔", [ 8852, 0 ] ), 7219 entity( "𝒮", [ 119982, 0 ] ), 7220 entity( "⋆", [ 8902, 0 ] ), 7221 entity( "⋐", [ 8912, 0 ] ), 7222 entity( "⋐", [ 8912, 0 ] ), 7223 entity( "⊆", [ 8838, 0 ] ), 7224 entity( "≻", [ 8827, 0 ] ), 7225 entity( "⪰", [ 10928, 0 ] ), 7226 entity( "≽", [ 8829, 0 ] ), 7227 entity( "≿", [ 8831, 0 ] ), 7228 entity( "∋", [ 8715, 0 ] ), 7229 entity( "∑", [ 8721, 0 ] ), 7230 entity( "⋑", [ 8913, 0 ] ), 7231 entity( "⊃", [ 8835, 0 ] ), 7232 entity( "⊇", [ 8839, 0 ] ), 7233 entity( "⋑", [ 8913, 0 ] ), 7234 entity( "Þ", [ 222, 0 ] ), 7235 entity( "™", [ 8482, 0 ] ), 7236 entity( "Ћ", [ 1035, 0 ] ), 7237 entity( "Ц", [ 1062, 0 ] ), 7238 entity( "	", [ 9, 0 ] ), 7239 entity( "Τ", [ 932, 0 ] ), 7240 entity( "Ť", [ 356, 0 ] ), 7241 entity( "Ţ", [ 354, 0 ] ), 7242 entity( "Т", [ 1058, 0 ] ), 7243 entity( "𝔗", [ 120087, 0 ] ), 7244 entity( "∴", [ 8756, 0 ] ), 7245 entity( "Θ", [ 920, 0 ] ), 7246 entity( "  ", [ 8287, 8202 ] ), 7247 entity( " ", [ 8201, 0 ] ), 7248 entity( "∼", [ 8764, 0 ] ), 7249 entity( "≃", [ 8771, 0 ] ), 7250 entity( "≅", [ 8773, 0 ] ), 7251 entity( "≈", [ 8776, 0 ] ), 7252 entity( "𝕋", [ 120139, 0 ] ), 7253 entity( "⃛", [ 8411, 0 ] ), 7254 entity( "𝒯", [ 119983, 0 ] ), 7255 entity( "Ŧ", [ 358, 0 ] ), 7256 entity( "Ú", [ 218, 0 ] ), 7257 entity( "↟", [ 8607, 0 ] ), 7258 entity( "⥉", [ 10569, 0 ] ), 7259 entity( "Ў", [ 1038, 0 ] ), 7260 entity( "Ŭ", [ 364, 0 ] ), 7261 entity( "Û", [ 219, 0 ] ), 7262 entity( "У", [ 1059, 0 ] ), 7263 entity( "Ű", [ 368, 0 ] ), 7264 entity( "𝔘", [ 120088, 0 ] ), 7265 entity( "Ù", [ 217, 0 ] ), 7266 entity( "Ū", [ 362, 0 ] ), 7267 entity( "_", [ 95, 0 ] ), 7268 entity( "⏟", [ 9183, 0 ] ), 7269 entity( "⎵", [ 9141, 0 ] ), 7270 entity( "⏝", [ 9181, 0 ] ), 7271 entity( "⋃", [ 8899, 0 ] ), 7272 entity( "⊎", [ 8846, 0 ] ), 7273 entity( "Ų", [ 370, 0 ] ), 7274 entity( "𝕌", [ 120140, 0 ] ), 7275 entity( "↑", [ 8593, 0 ] ), 7276 entity( "⤒", [ 10514, 0 ] ), 7277 entity( "⇅", [ 8645, 0 ] ), 7278 entity( "↕", [ 8597, 0 ] ), 7279 entity( "⥮", [ 10606, 0 ] ), 7280 entity( "⊥", [ 8869, 0 ] ), 7281 entity( "↥", [ 8613, 0 ] ), 7282 entity( "⇑", [ 8657, 0 ] ), 7283 entity( "⇕", [ 8661, 0 ] ), 7284 entity( "↖", [ 8598, 0 ] ), 7285 entity( "↗", [ 8599, 0 ] ), 7286 entity( "ϒ", [ 978, 0 ] ), 7287 entity( "Υ", [ 933, 0 ] ), 7288 entity( "Ů", [ 366, 0 ] ), 7289 entity( "𝒰", [ 119984, 0 ] ), 7290 entity( "Ũ", [ 360, 0 ] ), 7291 entity( "Ü", [ 220, 0 ] ), 7292 entity( "⊫", [ 8875, 0 ] ), 7293 entity( "⫫", [ 10987, 0 ] ), 7294 entity( "В", [ 1042, 0 ] ), 7295 entity( "⊩", [ 8873, 0 ] ), 7296 entity( "⫦", [ 10982, 0 ] ), 7297 entity( "⋁", [ 8897, 0 ] ), 7298 entity( "‖", [ 8214, 0 ] ), 7299 entity( "‖", [ 8214, 0 ] ), 7300 entity( "∣", [ 8739, 0 ] ), 7301 entity( "|", [ 124, 0 ] ), 7302 entity( "❘", [ 10072, 0 ] ), 7303 entity( "≀", [ 8768, 0 ] ), 7304 entity( " ", [ 8202, 0 ] ), 7305 entity( "𝔙", [ 120089, 0 ] ), 7306 entity( "𝕍", [ 120141, 0 ] ), 7307 entity( "𝒱", [ 119985, 0 ] ), 7308 entity( "⊪", [ 8874, 0 ] ), 7309 entity( "Ŵ", [ 372, 0 ] ), 7310 entity( "⋀", [ 8896, 0 ] ), 7311 entity( "𝔚", [ 120090, 0 ] ), 7312 entity( "𝕎", [ 120142, 0 ] ), 7313 entity( "𝒲", [ 119986, 0 ] ), 7314 entity( "𝔛", [ 120091, 0 ] ), 7315 entity( "Ξ", [ 926, 0 ] ), 7316 entity( "𝕏", [ 120143, 0 ] ), 7317 entity( "𝒳", [ 119987, 0 ] ), 7318 entity( "Я", [ 1071, 0 ] ), 7319 entity( "Ї", [ 1031, 0 ] ), 7320 entity( "Ю", [ 1070, 0 ] ), 7321 entity( "Ý", [ 221, 0 ] ), 7322 entity( "Ŷ", [ 374, 0 ] ), 7323 entity( "Ы", [ 1067, 0 ] ), 7324 entity( "𝔜", [ 120092, 0 ] ), 7325 entity( "𝕐", [ 120144, 0 ] ), 7326 entity( "𝒴", [ 119988, 0 ] ), 7327 entity( "Ÿ", [ 376, 0 ] ), 7328 entity( "Ж", [ 1046, 0 ] ), 7329 entity( "Ź", [ 377, 0 ] ), 7330 entity( "Ž", [ 381, 0 ] ), 7331 entity( "З", [ 1047, 0 ] ), 7332 entity( "Ż", [ 379, 0 ] ), 7333 entity( "​", [ 8203, 0 ] ), 7334 entity( "Ζ", [ 918, 0 ] ), 7335 entity( "ℨ", [ 8488, 0 ] ), 7336 entity( "ℤ", [ 8484, 0 ] ), 7337 entity( "𝒵", [ 119989, 0 ] ), 7338 entity( "á", [ 225, 0 ] ), 7339 entity( "ă", [ 259, 0 ] ), 7340 entity( "∾", [ 8766, 0 ] ), 7341 entity( "∾̳", [ 8766, 819 ] ), 7342 entity( "∿", [ 8767, 0 ] ), 7343 entity( "â", [ 226, 0 ] ), 7344 entity( "´", [ 180, 0 ] ), 7345 entity( "а", [ 1072, 0 ] ), 7346 entity( "æ", [ 230, 0 ] ), 7347 entity( "⁡", [ 8289, 0 ] ), 7348 entity( "𝔞", [ 120094, 0 ] ), 7349 entity( "à", [ 224, 0 ] ), 7350 entity( "ℵ", [ 8501, 0 ] ), 7351 entity( "ℵ", [ 8501, 0 ] ), 7352 entity( "α", [ 945, 0 ] ), 7353 entity( "ā", [ 257, 0 ] ), 7354 entity( "⨿", [ 10815, 0 ] ), 7355 entity( "&", [ 38, 0 ] ), 7356 entity( "∧", [ 8743, 0 ] ), 7357 entity( "⩕", [ 10837, 0 ] ), 7358 entity( "⩜", [ 10844, 0 ] ), 7359 entity( "⩘", [ 10840, 0 ] ), 7360 entity( "⩚", [ 10842, 0 ] ), 7361 entity( "∠", [ 8736, 0 ] ), 7362 entity( "⦤", [ 10660, 0 ] ), 7363 entity( "∠", [ 8736, 0 ] ), 7364 entity( "∡", [ 8737, 0 ] ), 7365 entity( "⦨", [ 10664, 0 ] ), 7366 entity( "⦩", [ 10665, 0 ] ), 7367 entity( "⦪", [ 10666, 0 ] ), 7368 entity( "⦫", [ 10667, 0 ] ), 7369 entity( "⦬", [ 10668, 0 ] ), 7370 entity( "⦭", [ 10669, 0 ] ), 7371 entity( "⦮", [ 10670, 0 ] ), 7372 entity( "⦯", [ 10671, 0 ] ), 7373 entity( "∟", [ 8735, 0 ] ), 7374 entity( "⊾", [ 8894, 0 ] ), 7375 entity( "⦝", [ 10653, 0 ] ), 7376 entity( "∢", [ 8738, 0 ] ), 7377 entity( "Å", [ 197, 0 ] ), 7378 entity( "⍼", [ 9084, 0 ] ), 7379 entity( "ą", [ 261, 0 ] ), 7380 entity( "𝕒", [ 120146, 0 ] ), 7381 entity( "≈", [ 8776, 0 ] ), 7382 entity( "⩰", [ 10864, 0 ] ), 7383 entity( "⩯", [ 10863, 0 ] ), 7384 entity( "≊", [ 8778, 0 ] ), 7385 entity( "≋", [ 8779, 0 ] ), 7386 entity( "'", [ 39, 0 ] ), 7387 entity( "≈", [ 8776, 0 ] ), 7388 entity( "≊", [ 8778, 0 ] ), 7389 entity( "å", [ 229, 0 ] ), 7390 entity( "𝒶", [ 119990, 0 ] ), 7391 entity( "*", [ 42, 0 ] ), 7392 entity( "≈", [ 8776, 0 ] ), 7393 entity( "≍", [ 8781, 0 ] ), 7394 entity( "ã", [ 227, 0 ] ), 7395 entity( "ä", [ 228, 0 ] ), 7396 entity( "∳", [ 8755, 0 ] ), 7397 entity( "⨑", [ 10769, 0 ] ), 7398 entity( "⫭", [ 10989, 0 ] ), 7399 entity( "≌", [ 8780, 0 ] ), 7400 entity( "϶", [ 1014, 0 ] ), 7401 entity( "‵", [ 8245, 0 ] ), 7402 entity( "∽", [ 8765, 0 ] ), 7403 entity( "⋍", [ 8909, 0 ] ), 7404 entity( "⊽", [ 8893, 0 ] ), 7405 entity( "⌅", [ 8965, 0 ] ), 7406 entity( "⌅", [ 8965, 0 ] ), 7407 entity( "⎵", [ 9141, 0 ] ), 7408 entity( "⎶", [ 9142, 0 ] ), 7409 entity( "≌", [ 8780, 0 ] ), 7410 entity( "б", [ 1073, 0 ] ), 7411 entity( "„", [ 8222, 0 ] ), 7412 entity( "∵", [ 8757, 0 ] ), 7413 entity( "∵", [ 8757, 0 ] ), 7414 entity( "⦰", [ 10672, 0 ] ), 7415 entity( "϶", [ 1014, 0 ] ), 7416 entity( "ℬ", [ 8492, 0 ] ), 7417 entity( "β", [ 946, 0 ] ), 7418 entity( "ℶ", [ 8502, 0 ] ), 7419 entity( "≬", [ 8812, 0 ] ), 7420 entity( "𝔟", [ 120095, 0 ] ), 7421 entity( "⋂", [ 8898, 0 ] ), 7422 entity( "◯", [ 9711, 0 ] ), 7423 entity( "⋃", [ 8899, 0 ] ), 7424 entity( "⨀", [ 10752, 0 ] ), 7425 entity( "⨁", [ 10753, 0 ] ), 7426 entity( "⨂", [ 10754, 0 ] ), 7427 entity( "⨆", [ 10758, 0 ] ), 7428 entity( "★", [ 9733, 0 ] ), 7429 entity( "▽", [ 9661, 0 ] ), 7430 entity( "△", [ 9651, 0 ] ), 7431 entity( "⨄", [ 10756, 0 ] ), 7432 entity( "⋁", [ 8897, 0 ] ), 7433 entity( "⋀", [ 8896, 0 ] ), 7434 entity( "⤍", [ 10509, 0 ] ), 7435 entity( "⧫", [ 10731, 0 ] ), 7436 entity( "▪", [ 9642, 0 ] ), 7437 entity( "▴", [ 9652, 0 ] ), 7438 entity( "▾", [ 9662, 0 ] ), 7439 entity( "◂", [ 9666, 0 ] ), 7440 entity( "▸", [ 9656, 0 ] ), 7441 entity( "␣", [ 9251, 0 ] ), 7442 entity( "▒", [ 9618, 0 ] ), 7443 entity( "░", [ 9617, 0 ] ), 7444 entity( "▓", [ 9619, 0 ] ), 7445 entity( "█", [ 9608, 0 ] ), 7446 entity( "=⃥", [ 61, 8421 ] ), 7447 entity( "≡⃥", [ 8801, 8421 ] ), 7448 entity( "⌐", [ 8976, 0 ] ), 7449 entity( "𝕓", [ 120147, 0 ] ), 7450 entity( "⊥", [ 8869, 0 ] ), 7451 entity( "⊥", [ 8869, 0 ] ), 7452 entity( "⋈", [ 8904, 0 ] ), 7453 entity( "╗", [ 9559, 0 ] ), 7454 entity( "╔", [ 9556, 0 ] ), 7455 entity( "╖", [ 9558, 0 ] ), 7456 entity( "╓", [ 9555, 0 ] ), 7457 entity( "═", [ 9552, 0 ] ), 7458 entity( "╦", [ 9574, 0 ] ), 7459 entity( "╩", [ 9577, 0 ] ), 7460 entity( "╤", [ 9572, 0 ] ), 7461 entity( "╧", [ 9575, 0 ] ), 7462 entity( "╝", [ 9565, 0 ] ), 7463 entity( "╚", [ 9562, 0 ] ), 7464 entity( "╜", [ 9564, 0 ] ), 7465 entity( "╙", [ 9561, 0 ] ), 7466 entity( "║", [ 9553, 0 ] ), 7467 entity( "╬", [ 9580, 0 ] ), 7468 entity( "╣", [ 9571, 0 ] ), 7469 entity( "╠", [ 9568, 0 ] ), 7470 entity( "╫", [ 9579, 0 ] ), 7471 entity( "╢", [ 9570, 0 ] ), 7472 entity( "╟", [ 9567, 0 ] ), 7473 entity( "⧉", [ 10697, 0 ] ), 7474 entity( "╕", [ 9557, 0 ] ), 7475 entity( "╒", [ 9554, 0 ] ), 7476 entity( "┐", [ 9488, 0 ] ), 7477 entity( "┌", [ 9484, 0 ] ), 7478 entity( "─", [ 9472, 0 ] ), 7479 entity( "╥", [ 9573, 0 ] ), 7480 entity( "╨", [ 9576, 0 ] ), 7481 entity( "┬", [ 9516, 0 ] ), 7482 entity( "┴", [ 9524, 0 ] ), 7483 entity( "⊟", [ 8863, 0 ] ), 7484 entity( "⊞", [ 8862, 0 ] ), 7485 entity( "⊠", [ 8864, 0 ] ), 7486 entity( "╛", [ 9563, 0 ] ), 7487 entity( "╘", [ 9560, 0 ] ), 7488 entity( "┘", [ 9496, 0 ] ), 7489 entity( "└", [ 9492, 0 ] ), 7490 entity( "│", [ 9474, 0 ] ), 7491 entity( "╪", [ 9578, 0 ] ), 7492 entity( "╡", [ 9569, 0 ] ), 7493 entity( "╞", [ 9566, 0 ] ), 7494 entity( "┼", [ 9532, 0 ] ), 7495 entity( "┤", [ 9508, 0 ] ), 7496 entity( "├", [ 9500, 0 ] ), 7497 entity( "‵", [ 8245, 0 ] ), 7498 entity( "˘", [ 728, 0 ] ), 7499 entity( "¦", [ 166, 0 ] ), 7500 entity( "𝒷", [ 119991, 0 ] ), 7501 entity( "⁏", [ 8271, 0 ] ), 7502 entity( "∽", [ 8765, 0 ] ), 7503 entity( "⋍", [ 8909, 0 ] ), 7504 entity( "\", [ 92, 0 ] ), 7505 entity( "⧅", [ 10693, 0 ] ), 7506 entity( "⟈", [ 10184, 0 ] ), 7507 entity( "•", [ 8226, 0 ] ), 7508 entity( "•", [ 8226, 0 ] ), 7509 entity( "≎", [ 8782, 0 ] ), 7510 entity( "⪮", [ 10926, 0 ] ), 7511 entity( "≏", [ 8783, 0 ] ), 7512 entity( "≏", [ 8783, 0 ] ), 7513 entity( "ć", [ 263, 0 ] ), 7514 entity( "∩", [ 8745, 0 ] ), 7515 entity( "⩄", [ 10820, 0 ] ), 7516 entity( "⩉", [ 10825, 0 ] ), 7517 entity( "⩋", [ 10827, 0 ] ), 7518 entity( "⩇", [ 10823, 0 ] ), 7519 entity( "⩀", [ 10816, 0 ] ), 7520 entity( "∩︀", [ 8745, 65024 ] ), 7521 entity( "⁁", [ 8257, 0 ] ), 7522 entity( "ˇ", [ 711, 0 ] ), 7523 entity( "⩍", [ 10829, 0 ] ), 7524 entity( "č", [ 269, 0 ] ), 7525 entity( "ç", [ 231, 0 ] ), 7526 entity( "ĉ", [ 265, 0 ] ), 7527 entity( "⩌", [ 10828, 0 ] ), 7528 entity( "⩐", [ 10832, 0 ] ), 7529 entity( "ċ", [ 267, 0 ] ), 7530 entity( "¸", [ 184, 0 ] ), 7531 entity( "⦲", [ 10674, 0 ] ), 7532 entity( "¢", [ 162, 0 ] ), 7533 entity( "·", [ 183, 0 ] ), 7534 entity( "𝔠", [ 120096, 0 ] ), 7535 entity( "ч", [ 1095, 0 ] ), 7536 entity( "✓", [ 10003, 0 ] ), 7537 entity( "✓", [ 10003, 0 ] ), 7538 entity( "χ", [ 967, 0 ] ), 7539 entity( "○", [ 9675, 0 ] ), 7540 entity( "⧃", [ 10691, 0 ] ), 7541 entity( "ˆ", [ 710, 0 ] ), 7542 entity( "≗", [ 8791, 0 ] ), 7543 entity( "↺", [ 8634, 0 ] ), 7544 entity( "↻", [ 8635, 0 ] ), 7545 entity( "®", [ 174, 0 ] ), 7546 entity( "Ⓢ", [ 9416, 0 ] ), 7547 entity( "⊛", [ 8859, 0 ] ), 7548 entity( "⊚", [ 8858, 0 ] ), 7549 entity( "⊝", [ 8861, 0 ] ), 7550 entity( "≗", [ 8791, 0 ] ), 7551 entity( "⨐", [ 10768, 0 ] ), 7552 entity( "⫯", [ 10991, 0 ] ), 7553 entity( "⧂", [ 10690, 0 ] ), 7554 entity( "♣", [ 9827, 0 ] ), 7555 entity( "♣", [ 9827, 0 ] ), 7556 entity( ":", [ 58, 0 ] ), 7557 entity( "≔", [ 8788, 0 ] ), 7558 entity( "≔", [ 8788, 0 ] ), 7559 entity( ",", [ 44, 0 ] ), 7560 entity( "@", [ 64, 0 ] ), 7561 entity( "∁", [ 8705, 0 ] ), 7562 entity( "∘", [ 8728, 0 ] ), 7563 entity( "∁", [ 8705, 0 ] ), 7564 entity( "ℂ", [ 8450, 0 ] ), 7565 entity( "≅", [ 8773, 0 ] ), 7566 entity( "⩭", [ 10861, 0 ] ), 7567 entity( "∮", [ 8750, 0 ] ), 7568 entity( "𝕔", [ 120148, 0 ] ), 7569 entity( "∐", [ 8720, 0 ] ), 7570 entity( "©", [ 169, 0 ] ), 7571 entity( "℗", [ 8471, 0 ] ), 7572 entity( "↵", [ 8629, 0 ] ), 7573 entity( "✗", [ 10007, 0 ] ), 7574 entity( "𝒸", [ 119992, 0 ] ), 7575 entity( "⫏", [ 10959, 0 ] ), 7576 entity( "⫑", [ 10961, 0 ] ), 7577 entity( "⫐", [ 10960, 0 ] ), 7578 entity( "⫒", [ 10962, 0 ] ), 7579 entity( "⋯", [ 8943, 0 ] ), 7580 entity( "⤸", [ 10552, 0 ] ), 7581 entity( "⤵", [ 10549, 0 ] ), 7582 entity( "⋞", [ 8926, 0 ] ), 7583 entity( "⋟", [ 8927, 0 ] ), 7584 entity( "↶", [ 8630, 0 ] ), 7585 entity( "⤽", [ 10557, 0 ] ), 7586 entity( "∪", [ 8746, 0 ] ), 7587 entity( "⩈", [ 10824, 0 ] ), 7588 entity( "⩆", [ 10822, 0 ] ), 7589 entity( "⩊", [ 10826, 0 ] ), 7590 entity( "⊍", [ 8845, 0 ] ), 7591 entity( "⩅", [ 10821, 0 ] ), 7592 entity( "∪︀", [ 8746, 65024 ] ), 7593 entity( "↷", [ 8631, 0 ] ), 7594 entity( "⤼", [ 10556, 0 ] ), 7595 entity( "⋞", [ 8926, 0 ] ), 7596 entity( "⋟", [ 8927, 0 ] ), 7597 entity( "⋎", [ 8910, 0 ] ), 7598 entity( "⋏", [ 8911, 0 ] ), 7599 entity( "¤", [ 164, 0 ] ), 7600 entity( "↶", [ 8630, 0 ] ), 7601 entity( "↷", [ 8631, 0 ] ), 7602 entity( "⋎", [ 8910, 0 ] ), 7603 entity( "⋏", [ 8911, 0 ] ), 7604 entity( "∲", [ 8754, 0 ] ), 7605 entity( "∱", [ 8753, 0 ] ), 7606 entity( "⌭", [ 9005, 0 ] ), 7607 entity( "⇓", [ 8659, 0 ] ), 7608 entity( "⥥", [ 10597, 0 ] ), 7609 entity( "†", [ 8224, 0 ] ), 7610 entity( "ℸ", [ 8504, 0 ] ), 7611 entity( "↓", [ 8595, 0 ] ), 7612 entity( "‐", [ 8208, 0 ] ), 7613 entity( "⊣", [ 8867, 0 ] ), 7614 entity( "⤏", [ 10511, 0 ] ), 7615 entity( "˝", [ 733, 0 ] ), 7616 entity( "ď", [ 271, 0 ] ), 7617 entity( "д", [ 1076, 0 ] ), 7618 entity( "ⅆ", [ 8518, 0 ] ), 7619 entity( "‡", [ 8225, 0 ] ), 7620 entity( "⇊", [ 8650, 0 ] ), 7621 entity( "⩷", [ 10871, 0 ] ), 7622 entity( "°", [ 176, 0 ] ), 7623 entity( "δ", [ 948, 0 ] ), 7624 entity( "⦱", [ 10673, 0 ] ), 7625 entity( "⥿", [ 10623, 0 ] ), 7626 entity( "𝔡", [ 120097, 0 ] ), 7627 entity( "⇃", [ 8643, 0 ] ), 7628 entity( "⇂", [ 8642, 0 ] ), 7629 entity( "⋄", [ 8900, 0 ] ), 7630 entity( "⋄", [ 8900, 0 ] ), 7631 entity( "♦", [ 9830, 0 ] ), 7632 entity( "♦", [ 9830, 0 ] ), 7633 entity( "¨", [ 168, 0 ] ), 7634 entity( "ϝ", [ 989, 0 ] ), 7635 entity( "⋲", [ 8946, 0 ] ), 7636 entity( "÷", [ 247, 0 ] ), 7637 entity( "÷", [ 247, 0 ] ), 7638 entity( "⋇", [ 8903, 0 ] ), 7639 entity( "⋇", [ 8903, 0 ] ), 7640 entity( "ђ", [ 1106, 0 ] ), 7641 entity( "⌞", [ 8990, 0 ] ), 7642 entity( "⌍", [ 8973, 0 ] ), 7643 entity( "$", [ 36, 0 ] ), 7644 entity( "𝕕", [ 120149, 0 ] ), 7645 entity( "˙", [ 729, 0 ] ), 7646 entity( "≐", [ 8784, 0 ] ), 7647 entity( "≑", [ 8785, 0 ] ), 7648 entity( "∸", [ 8760, 0 ] ), 7649 entity( "∔", [ 8724, 0 ] ), 7650 entity( "⊡", [ 8865, 0 ] ), 7651 entity( "⌆", [ 8966, 0 ] ), 7652 entity( "↓", [ 8595, 0 ] ), 7653 entity( "⇊", [ 8650, 0 ] ), 7654 entity( "⇃", [ 8643, 0 ] ), 7655 entity( "⇂", [ 8642, 0 ] ), 7656 entity( "⤐", [ 10512, 0 ] ), 7657 entity( "⌟", [ 8991, 0 ] ), 7658 entity( "⌌", [ 8972, 0 ] ), 7659 entity( "𝒹", [ 119993, 0 ] ), 7660 entity( "ѕ", [ 1109, 0 ] ), 7661 entity( "⧶", [ 10742, 0 ] ), 7662 entity( "đ", [ 273, 0 ] ), 7663 entity( "⋱", [ 8945, 0 ] ), 7664 entity( "▿", [ 9663, 0 ] ), 7665 entity( "▾", [ 9662, 0 ] ), 7666 entity( "⇵", [ 8693, 0 ] ), 7667 entity( "⥯", [ 10607, 0 ] ), 7668 entity( "⦦", [ 10662, 0 ] ), 7669 entity( "џ", [ 1119, 0 ] ), 7670 entity( "⟿", [ 10239, 0 ] ), 7671 entity( "⩷", [ 10871, 0 ] ), 7672 entity( "≑", [ 8785, 0 ] ), 7673 entity( "é", [ 233, 0 ] ), 7674 entity( "⩮", [ 10862, 0 ] ), 7675 entity( "ě", [ 283, 0 ] ), 7676 entity( "≖", [ 8790, 0 ] ), 7677 entity( "ê", [ 234, 0 ] ), 7678 entity( "≕", [ 8789, 0 ] ), 7679 entity( "э", [ 1101, 0 ] ), 7680 entity( "ė", [ 279, 0 ] ), 7681 entity( "ⅇ", [ 8519, 0 ] ), 7682 entity( "≒", [ 8786, 0 ] ), 7683 entity( "𝔢", [ 120098, 0 ] ), 7684 entity( "⪚", [ 10906, 0 ] ), 7685 entity( "è", [ 232, 0 ] ), 7686 entity( "⪖", [ 10902, 0 ] ), 7687 entity( "⪘", [ 10904, 0 ] ), 7688 entity( "⪙", [ 10905, 0 ] ), 7689 entity( "⏧", [ 9191, 0 ] ), 7690 entity( "ℓ", [ 8467, 0 ] ), 7691 entity( "⪕", [ 10901, 0 ] ), 7692 entity( "⪗", [ 10903, 0 ] ), 7693 entity( "ē", [ 275, 0 ] ), 7694 entity( "∅", [ 8709, 0 ] ), 7695 entity( "∅", [ 8709, 0 ] ), 7696 entity( "∅", [ 8709, 0 ] ), 7697 entity( " ", [ 8196, 0 ] ), 7698 entity( " ", [ 8197, 0 ] ), 7699 entity( " ", [ 8195, 0 ] ), 7700 entity( "ŋ", [ 331, 0 ] ), 7701 entity( " ", [ 8194, 0 ] ), 7702 entity( "ę", [ 281, 0 ] ), 7703 entity( "𝕖", [ 120150, 0 ] ), 7704 entity( "⋕", [ 8917, 0 ] ), 7705 entity( "⧣", [ 10723, 0 ] ), 7706 entity( "⩱", [ 10865, 0 ] ), 7707 entity( "ε", [ 949, 0 ] ), 7708 entity( "ε", [ 949, 0 ] ), 7709 entity( "ϵ", [ 1013, 0 ] ), 7710 entity( "≖", [ 8790, 0 ] ), 7711 entity( "≕", [ 8789, 0 ] ), 7712 entity( "≂", [ 8770, 0 ] ), 7713 entity( "⪖", [ 10902, 0 ] ), 7714 entity( "⪕", [ 10901, 0 ] ), 7715 entity( "=", [ 61, 0 ] ), 7716 entity( "≟", [ 8799, 0 ] ), 7717 entity( "≡", [ 8801, 0 ] ), 7718 entity( "⩸", [ 10872, 0 ] ), 7719 entity( "⧥", [ 10725, 0 ] ), 7720 entity( "≓", [ 8787, 0 ] ), 7721 entity( "⥱", [ 10609, 0 ] ), 7722 entity( "ℯ", [ 8495, 0 ] ), 7723 entity( "≐", [ 8784, 0 ] ), 7724 entity( "≂", [ 8770, 0 ] ), 7725 entity( "η", [ 951, 0 ] ), 7726 entity( "ð", [ 240, 0 ] ), 7727 entity( "ë", [ 235, 0 ] ), 7728 entity( "€", [ 8364, 0 ] ), 7729 entity( "!", [ 33, 0 ] ), 7730 entity( "∃", [ 8707, 0 ] ), 7731 entity( "ℰ", [ 8496, 0 ] ), 7732 entity( "ⅇ", [ 8519, 0 ] ), 7733 entity( "≒", [ 8786, 0 ] ), 7734 entity( "ф", [ 1092, 0 ] ), 7735 entity( "♀", [ 9792, 0 ] ), 7736 entity( "ffi", [ 64259, 0 ] ), 7737 entity( "ff", [ 64256, 0 ] ), 7738 entity( "ffl", [ 64260, 0 ] ), 7739 entity( "𝔣", [ 120099, 0 ] ), 7740 entity( "fi", [ 64257, 0 ] ), 7741 entity( "fj", [ 102, 106 ] ), 7742 entity( "♭", [ 9837, 0 ] ), 7743 entity( "fl", [ 64258, 0 ] ), 7744 entity( "▱", [ 9649, 0 ] ), 7745 entity( "ƒ", [ 402, 0 ] ), 7746 entity( "𝕗", [ 120151, 0 ] ), 7747 entity( "∀", [ 8704, 0 ] ), 7748 entity( "⋔", [ 8916, 0 ] ), 7749 entity( "⫙", [ 10969, 0 ] ), 7750 entity( "⨍", [ 10765, 0 ] ), 7751 entity( "½", [ 189, 0 ] ), 7752 entity( "½", [ 189, 0 ] ), 7753 entity( "⅓", [ 8531, 0 ] ), 7754 entity( "¼", [ 188, 0 ] ), 7755 entity( "¼", [ 188, 0 ] ), 7756 entity( "⅕", [ 8533, 0 ] ), 7757 entity( "⅙", [ 8537, 0 ] ), 7758 entity( "⅛", [ 8539, 0 ] ), 7759 entity( "⅔", [ 8532, 0 ] ), 7760 entity( "⅖", [ 8534, 0 ] ), 7761 entity( "¾", [ 190, 0 ] ), 7762 entity( "¾", [ 190, 0 ] ), 7763 entity( "⅗", [ 8535, 0 ] ), 7764 entity( "⅜", [ 8540, 0 ] ), 7765 entity( "⅘", [ 8536, 0 ] ), 7766 entity( "⅚", [ 8538, 0 ] ), 7767 entity( "⅝", [ 8541, 0 ] ), 7768 entity( "⅞", [ 8542, 0 ] ), 7769 entity( "⁄", [ 8260, 0 ] ), 7770 entity( "⌢", [ 8994, 0 ] ), 7771 entity( "𝒻", [ 119995, 0 ] ), 7772 entity( "≧", [ 8807, 0 ] ), 7773 entity( "⪌", [ 10892, 0 ] ), 7774 entity( "ǵ", [ 501, 0 ] ), 7775 entity( "γ", [ 947, 0 ] ), 7776 entity( "ϝ", [ 989, 0 ] ), 7777 entity( "⪆", [ 10886, 0 ] ), 7778 entity( "ğ", [ 287, 0 ] ), 7779 entity( "ĝ", [ 285, 0 ] ), 7780 entity( "г", [ 1075, 0 ] ), 7781 entity( "ġ", [ 289, 0 ] ), 7782 entity( "≥", [ 8805, 0 ] ), 7783 entity( "⋛", [ 8923, 0 ] ), 7784 entity( "≥", [ 8805, 0 ] ), 7785 entity( "≧", [ 8807, 0 ] ), 7786 entity( "⩾", [ 10878, 0 ] ), 7787 entity( "⩾", [ 10878, 0 ] ), 7788 entity( "⪩", [ 10921, 0 ] ), 7789 entity( "⪀", [ 10880, 0 ] ), 7790 entity( "⪂", [ 10882, 0 ] ), 7791 entity( "⪄", [ 10884, 0 ] ), 7792 entity( "⋛︀", [ 8923, 65024 ] ), 7793 entity( "⪔", [ 10900, 0 ] ), 7794 entity( "𝔤", [ 120100, 0 ] ), 7795 entity( "≫", [ 8811, 0 ] ), 7796 entity( "⋙", [ 8921, 0 ] ), 7797 entity( "ℷ", [ 8503, 0 ] ), 7798 entity( "ѓ", [ 1107, 0 ] ), 7799 entity( "≷", [ 8823, 0 ] ), 7800 entity( "⪒", [ 10898, 0 ] ), 7801 entity( "⪥", [ 10917, 0 ] ), 7802 entity( "⪤", [ 10916, 0 ] ), 7803 entity( "≩", [ 8809, 0 ] ), 7804 entity( "⪊", [ 10890, 0 ] ), 7805 entity( "⪊", [ 10890, 0 ] ), 7806 entity( "⪈", [ 10888, 0 ] ), 7807 entity( "⪈", [ 10888, 0 ] ), 7808 entity( "≩", [ 8809, 0 ] ), 7809 entity( "⋧", [ 8935, 0 ] ), 7810 entity( "𝕘", [ 120152, 0 ] ), 7811 entity( "`", [ 96, 0 ] ), 7812 entity( "ℊ", [ 8458, 0 ] ), 7813 entity( "≳", [ 8819, 0 ] ), 7814 entity( "⪎", [ 10894, 0 ] ), 7815 entity( "⪐", [ 10896, 0 ] ), 7816 entity( ">", [ 62, 0 ] ), 7817 entity( "⪧", [ 10919, 0 ] ), 7818 entity( "⩺", [ 10874, 0 ] ), 7819 entity( "⋗", [ 8919, 0 ] ), 7820 entity( "⦕", [ 10645, 0 ] ), 7821 entity( "⩼", [ 10876, 0 ] ), 7822 entity( "⪆", [ 10886, 0 ] ), 7823 entity( "⥸", [ 10616, 0 ] ), 7824 entity( "⋗", [ 8919, 0 ] ), 7825 entity( "⋛", [ 8923, 0 ] ), 7826 entity( "⪌", [ 10892, 0 ] ), 7827 entity( "≷", [ 8823, 0 ] ), 7828 entity( "≳", [ 8819, 0 ] ), 7829 entity( "≩︀", [ 8809, 65024 ] ), 7830 entity( "≩︀", [ 8809, 65024 ] ), 7831 entity( "⇔", [ 8660, 0 ] ), 7832 entity( " ", [ 8202, 0 ] ), 7833 entity( "½", [ 189, 0 ] ), 7834 entity( "ℋ", [ 8459, 0 ] ), 7835 entity( "ъ", [ 1098, 0 ] ), 7836 entity( "↔", [ 8596, 0 ] ), 7837 entity( "⥈", [ 10568, 0 ] ), 7838 entity( "↭", [ 8621, 0 ] ), 7839 entity( "ℏ", [ 8463, 0 ] ), 7840 entity( "ĥ", [ 293, 0 ] ), 7841 entity( "♥", [ 9829, 0 ] ), 7842 entity( "♥", [ 9829, 0 ] ), 7843 entity( "…", [ 8230, 0 ] ), 7844 entity( "⊹", [ 8889, 0 ] ), 7845 entity( "𝔥", [ 120101, 0 ] ), 7846 entity( "⤥", [ 10533, 0 ] ), 7847 entity( "⤦", [ 10534, 0 ] ), 7848 entity( "⇿", [ 8703, 0 ] ), 7849 entity( "∻", [ 8763, 0 ] ), 7850 entity( "↩", [ 8617, 0 ] ), 7851 entity( "↪", [ 8618, 0 ] ), 7852 entity( "𝕙", [ 120153, 0 ] ), 7853 entity( "―", [ 8213, 0 ] ), 7854 entity( "𝒽", [ 119997, 0 ] ), 7855 entity( "ℏ", [ 8463, 0 ] ), 7856 entity( "ħ", [ 295, 0 ] ), 7857 entity( "⁃", [ 8259, 0 ] ), 7858 entity( "‐", [ 8208, 0 ] ), 7859 entity( "í", [ 237, 0 ] ), 7860 entity( "⁣", [ 8291, 0 ] ), 7861 entity( "î", [ 238, 0 ] ), 7862 entity( "и", [ 1080, 0 ] ), 7863 entity( "е", [ 1077, 0 ] ), 7864 entity( "¡", [ 161, 0 ] ), 7865 entity( "⇔", [ 8660, 0 ] ), 7866 entity( "𝔦", [ 120102, 0 ] ), 7867 entity( "ì", [ 236, 0 ] ), 7868 entity( "ⅈ", [ 8520, 0 ] ), 7869 entity( "⨌", [ 10764, 0 ] ), 7870 entity( "∭", [ 8749, 0 ] ), 7871 entity( "⧜", [ 10716, 0 ] ), 7872 entity( "℩", [ 8489, 0 ] ), 7873 entity( "ij", [ 307, 0 ] ), 7874 entity( "ī", [ 299, 0 ] ), 7875 entity( "ℑ", [ 8465, 0 ] ), 7876 entity( "ℐ", [ 8464, 0 ] ), 7877 entity( "ℑ", [ 8465, 0 ] ), 7878 entity( "ı", [ 305, 0 ] ), 7879 entity( "⊷", [ 8887, 0 ] ), 7880 entity( "Ƶ", [ 437, 0 ] ), 7881 entity( "∈", [ 8712, 0 ] ), 7882 entity( "℅", [ 8453, 0 ] ), 7883 entity( "∞", [ 8734, 0 ] ), 7884 entity( "⧝", [ 10717, 0 ] ), 7885 entity( "ı", [ 305, 0 ] ), 7886 entity( "∫", [ 8747, 0 ] ), 7887 entity( "⊺", [ 8890, 0 ] ), 7888 entity( "ℤ", [ 8484, 0 ] ), 7889 entity( "⊺", [ 8890, 0 ] ), 7890 entity( "⨗", [ 10775, 0 ] ), 7891 entity( "⨼", [ 10812, 0 ] ), 7892 entity( "ё", [ 1105, 0 ] ), 7893 entity( "į", [ 303, 0 ] ), 7894 entity( "𝕚", [ 120154, 0 ] ), 7895 entity( "ι", [ 953, 0 ] ), 7896 entity( "⨼", [ 10812, 0 ] ), 7897 entity( "¿", [ 191, 0 ] ), 7898 entity( "𝒾", [ 119998, 0 ] ), 7899 entity( "∈", [ 8712, 0 ] ), 7900 entity( "⋹", [ 8953, 0 ] ), 7901 entity( "⋵", [ 8949, 0 ] ), 7902 entity( "⋴", [ 8948, 0 ] ), 7903 entity( "⋳", [ 8947, 0 ] ), 7904 entity( "∈", [ 8712, 0 ] ), 7905 entity( "⁢", [ 8290, 0 ] ), 7906 entity( "ĩ", [ 297, 0 ] ), 7907 entity( "і", [ 1110, 0 ] ), 7908 entity( "ï", [ 239, 0 ] ), 7909 entity( "ĵ", [ 309, 0 ] ), 7910 entity( "й", [ 1081, 0 ] ), 7911 entity( "𝔧", [ 120103, 0 ] ), 7912 entity( "ȷ", [ 567, 0 ] ), 7913 entity( "𝕛", [ 120155, 0 ] ), 7914 entity( "𝒿", [ 119999, 0 ] ), 7915 entity( "ј", [ 1112, 0 ] ), 7916 entity( "є", [ 1108, 0 ] ), 7917 entity( "κ", [ 954, 0 ] ), 7918 entity( "ϰ", [ 1008, 0 ] ), 7919 entity( "ķ", [ 311, 0 ] ), 7920 entity( "к", [ 1082, 0 ] ), 7921 entity( "𝔨", [ 120104, 0 ] ), 7922 entity( "ĸ", [ 312, 0 ] ), 7923 entity( "х", [ 1093, 0 ] ), 7924 entity( "ќ", [ 1116, 0 ] ), 7925 entity( "𝕜", [ 120156, 0 ] ), 7926 entity( "𝓀", [ 120000, 0 ] ), 7927 entity( "⇚", [ 8666, 0 ] ), 7928 entity( "⇐", [ 8656, 0 ] ), 7929 entity( "⤛", [ 10523, 0 ] ), 7930 entity( "⤎", [ 10510, 0 ] ), 7931 entity( "≦", [ 8806, 0 ] ), 7932 entity( "⪋", [ 10891, 0 ] ), 7933 entity( "⥢", [ 10594, 0 ] ), 7934 entity( "ĺ", [ 314, 0 ] ), 7935 entity( "⦴", [ 10676, 0 ] ), 7936 entity( "ℒ", [ 8466, 0 ] ), 7937 entity( "λ", [ 955, 0 ] ), 7938 entity( "⟨", [ 10216, 0 ] ), 7939 entity( "⦑", [ 10641, 0 ] ), 7940 entity( "⟨", [ 10216, 0 ] ), 7941 entity( "⪅", [ 10885, 0 ] ), 7942 entity( "«", [ 171, 0 ] ), 7943 entity( "←", [ 8592, 0 ] ), 7944 entity( "⇤", [ 8676, 0 ] ), 7945 entity( "⤟", [ 10527, 0 ] ), 7946 entity( "⤝", [ 10525, 0 ] ), 7947 entity( "↩", [ 8617, 0 ] ), 7948 entity( "↫", [ 8619, 0 ] ), 7949 entity( "⤹", [ 10553, 0 ] ), 7950 entity( "⥳", [ 10611, 0 ] ), 7951 entity( "↢", [ 8610, 0 ] ), 7952 entity( "⪫", [ 10923, 0 ] ), 7953 entity( "⤙", [ 10521, 0 ] ), 7954 entity( "⪭", [ 10925, 0 ] ), 7955 entity( "⪭︀", [ 10925, 65024 ] ), 7956 entity( "⤌", [ 10508, 0 ] ), 7957 entity( "❲", [ 10098, 0 ] ), 7958 entity( "{", [ 123, 0 ] ), 7959 entity( "[", [ 91, 0 ] ), 7960 entity( "⦋", [ 10635, 0 ] ), 7961 entity( "⦏", [ 10639, 0 ] ), 7962 entity( "⦍", [ 10637, 0 ] ), 7963 entity( "ľ", [ 318, 0 ] ), 7964 entity( "ļ", [ 316, 0 ] ), 7965 entity( "⌈", [ 8968, 0 ] ), 7966 entity( "{", [ 123, 0 ] ), 7967 entity( "л", [ 1083, 0 ] ), 7968 entity( "⤶", [ 10550, 0 ] ), 7969 entity( "“", [ 8220, 0 ] ), 7970 entity( "„", [ 8222, 0 ] ), 7971 entity( "⥧", [ 10599, 0 ] ), 7972 entity( "⥋", [ 10571, 0 ] ), 7973 entity( "↲", [ 8626, 0 ] ), 7974 entity( "≤", [ 8804, 0 ] ), 7975 entity( "←", [ 8592, 0 ] ), 7976 entity( "↢", [ 8610, 0 ] ), 7977 entity( "↽", [ 8637, 0 ] ), 7978 entity( "↼", [ 8636, 0 ] ), 7979 entity( "⇇", [ 8647, 0 ] ), 7980 entity( "↔", [ 8596, 0 ] ), 7981 entity( "⇆", [ 8646, 0 ] ), 7982 entity( "⇋", [ 8651, 0 ] ), 7983 entity( "↭", [ 8621, 0 ] ), 7984 entity( "⋋", [ 8907, 0 ] ), 7985 entity( "⋚", [ 8922, 0 ] ), 7986 entity( "≤", [ 8804, 0 ] ), 7987 entity( "≦", [ 8806, 0 ] ), 7988 entity( "⩽", [ 10877, 0 ] ), 7989 entity( "⩽", [ 10877, 0 ] ), 7990 entity( "⪨", [ 10920, 0 ] ), 7991 entity( "⩿", [ 10879, 0 ] ), 7992 entity( "⪁", [ 10881, 0 ] ), 7993 entity( "⪃", [ 10883, 0 ] ), 7994 entity( "⋚︀", [ 8922, 65024 ] ), 7995 entity( "⪓", [ 10899, 0 ] ), 7996 entity( "⪅", [ 10885, 0 ] ), 7997 entity( "⋖", [ 8918, 0 ] ), 7998 entity( "⋚", [ 8922, 0 ] ), 7999 entity( "⪋", [ 10891, 0 ] ), 8000 entity( "≶", [ 8822, 0 ] ), 8001 entity( "≲", [ 8818, 0 ] ), 8002 entity( "⥼", [ 10620, 0 ] ), 8003 entity( "⌊", [ 8970, 0 ] ), 8004 entity( "𝔩", [ 120105, 0 ] ), 8005 entity( "≶", [ 8822, 0 ] ), 8006 entity( "⪑", [ 10897, 0 ] ), 8007 entity( "↽", [ 8637, 0 ] ), 8008 entity( "↼", [ 8636, 0 ] ), 8009 entity( "⥪", [ 10602, 0 ] ), 8010 entity( "▄", [ 9604, 0 ] ), 8011 entity( "љ", [ 1113, 0 ] ), 8012 entity( "≪", [ 8810, 0 ] ), 8013 entity( "⇇", [ 8647, 0 ] ), 8014 entity( "⌞", [ 8990, 0 ] ), 8015 entity( "⥫", [ 10603, 0 ] ), 8016 entity( "◺", [ 9722, 0 ] ), 8017 entity( "ŀ", [ 320, 0 ] ), 8018 entity( "⎰", [ 9136, 0 ] ), 8019 entity( "⎰", [ 9136, 0 ] ), 8020 entity( "≨", [ 8808, 0 ] ), 8021 entity( "⪉", [ 10889, 0 ] ), 8022 entity( "⪉", [ 10889, 0 ] ), 8023 entity( "⪇", [ 10887, 0 ] ), 8024 entity( "⪇", [ 10887, 0 ] ), 8025 entity( "≨", [ 8808, 0 ] ), 8026 entity( "⋦", [ 8934, 0 ] ), 8027 entity( "⟬", [ 10220, 0 ] ), 8028 entity( "⇽", [ 8701, 0 ] ), 8029 entity( "⟦", [ 10214, 0 ] ), 8030 entity( "⟵", [ 10229, 0 ] ), 8031 entity( "⟷", [ 10231, 0 ] ), 8032 entity( "⟼", [ 10236, 0 ] ), 8033 entity( "⟶", [ 10230, 0 ] ), 8034 entity( "↫", [ 8619, 0 ] ), 8035 entity( "↬", [ 8620, 0 ] ), 8036 entity( "⦅", [ 10629, 0 ] ), 8037 entity( "𝕝", [ 120157, 0 ] ), 8038 entity( "⨭", [ 10797, 0 ] ), 8039 entity( "⨴", [ 10804, 0 ] ), 8040 entity( "∗", [ 8727, 0 ] ), 8041 entity( "_", [ 95, 0 ] ), 8042 entity( "◊", [ 9674, 0 ] ), 8043 entity( "◊", [ 9674, 0 ] ), 8044 entity( "⧫", [ 10731, 0 ] ), 8045 entity( "(", [ 40, 0 ] ), 8046 entity( "⦓", [ 10643, 0 ] ), 8047 entity( "⇆", [ 8646, 0 ] ), 8048 entity( "⌟", [ 8991, 0 ] ), 8049 entity( "⇋", [ 8651, 0 ] ), 8050 entity( "⥭", [ 10605, 0 ] ), 8051 entity( "‎", [ 8206, 0 ] ), 8052 entity( "⊿", [ 8895, 0 ] ), 8053 entity( "‹", [ 8249, 0 ] ), 8054 entity( "𝓁", [ 120001, 0 ] ), 8055 entity( "↰", [ 8624, 0 ] ), 8056 entity( "≲", [ 8818, 0 ] ), 8057 entity( "⪍", [ 10893, 0 ] ), 8058 entity( "⪏", [ 10895, 0 ] ), 8059 entity( "[", [ 91, 0 ] ), 8060 entity( "‘", [ 8216, 0 ] ), 8061 entity( "‚", [ 8218, 0 ] ), 8062 entity( "ł", [ 322, 0 ] ), 8063 entity( "<", [ 60, 0 ] ), 8064 entity( "⪦", [ 10918, 0 ] ), 8065 entity( "⩹", [ 10873, 0 ] ), 8066 entity( "⋖", [ 8918, 0 ] ), 8067 entity( "⋋", [ 8907, 0 ] ), 8068 entity( "⋉", [ 8905, 0 ] ), 8069 entity( "⥶", [ 10614, 0 ] ), 8070 entity( "⩻", [ 10875, 0 ] ), 8071 entity( "⦖", [ 10646, 0 ] ), 8072 entity( "◃", [ 9667, 0 ] ), 8073 entity( "⊴", [ 8884, 0 ] ), 8074 entity( "◂", [ 9666, 0 ] ), 8075 entity( "⥊", [ 10570, 0 ] ), 8076 entity( "⥦", [ 10598, 0 ] ), 8077 entity( "≨︀", [ 8808, 65024 ] ), 8078 entity( "≨︀", [ 8808, 65024 ] ), 8079 entity( "∺", [ 8762, 0 ] ), 8080 entity( "¯", [ 175, 0 ] ), 8081 entity( "♂", [ 9794, 0 ] ), 8082 entity( "✠", [ 10016, 0 ] ), 8083 entity( "✠", [ 10016, 0 ] ), 8084 entity( "↦", [ 8614, 0 ] ), 8085 entity( "↦", [ 8614, 0 ] ), 8086 entity( "↧", [ 8615, 0 ] ), 8087 entity( "↤", [ 8612, 0 ] ), 8088 entity( "↥", [ 8613, 0 ] ), 8089 entity( "▮", [ 9646, 0 ] ), 8090 entity( "⨩", [ 10793, 0 ] ), 8091 entity( "м", [ 1084, 0 ] ), 8092 entity( "—", [ 8212, 0 ] ), 8093 entity( "∡", [ 8737, 0 ] ), 8094 entity( "𝔪", [ 120106, 0 ] ), 8095 entity( "℧", [ 8487, 0 ] ), 8096 entity( "µ", [ 181, 0 ] ), 8097 entity( "∣", [ 8739, 0 ] ), 8098 entity( "*", [ 42, 0 ] ), 8099 entity( "⫰", [ 10992, 0 ] ), 8100 entity( "·", [ 183, 0 ] ), 8101 entity( "−", [ 8722, 0 ] ), 8102 entity( "⊟", [ 8863, 0 ] ), 8103 entity( "∸", [ 8760, 0 ] ), 8104 entity( "⨪", [ 10794, 0 ] ), 8105 entity( "⫛", [ 10971, 0 ] ), 8106 entity( "…", [ 8230, 0 ] ), 8107 entity( "∓", [ 8723, 0 ] ), 8108 entity( "⊧", [ 8871, 0 ] ), 8109 entity( "𝕞", [ 120158, 0 ] ), 8110 entity( "∓", [ 8723, 0 ] ), 8111 entity( "𝓂", [ 120002, 0 ] ), 8112 entity( "∾", [ 8766, 0 ] ), 8113 entity( "μ", [ 956, 0 ] ), 8114 entity( "⊸", [ 8888, 0 ] ), 8115 entity( "⊸", [ 8888, 0 ] ), 8116 entity( "⋙̸", [ 8921, 824 ] ), 8117 entity( "≫⃒", [ 8811, 8402 ] ), 8118 entity( "≫̸", [ 8811, 824 ] ), 8119 entity( "⇍", [ 8653, 0 ] ), 8120 entity( "⇎", [ 8654, 0 ] ), 8121 entity( "⋘̸", [ 8920, 824 ] ), 8122 entity( "≪⃒", [ 8810, 8402 ] ), 8123 entity( "≪̸", [ 8810, 824 ] ), 8124 entity( "⇏", [ 8655, 0 ] ), 8125 entity( "⊯", [ 8879, 0 ] ), 8126 entity( "⊮", [ 8878, 0 ] ), 8127 entity( "∇", [ 8711, 0 ] ), 8128 entity( "ń", [ 324, 0 ] ), 8129 entity( "∠⃒", [ 8736, 8402 ] ), 8130 entity( "≉", [ 8777, 0 ] ), 8131 entity( "⩰̸", [ 10864, 824 ] ), 8132 entity( "≋̸", [ 8779, 824 ] ), 8133 entity( "ʼn", [ 329, 0 ] ), 8134 entity( "≉", [ 8777, 0 ] ), 8135 entity( "♮", [ 9838, 0 ] ), 8136 entity( "♮", [ 9838, 0 ] ), 8137 entity( "ℕ", [ 8469, 0 ] ), 8138 entity( " ", [ 160, 0 ] ), 8139 entity( "≎̸", [ 8782, 824 ] ), 8140 entity( "≏̸", [ 8783, 824 ] ), 8141 entity( "⩃", [ 10819, 0 ] ), 8142 entity( "ň", [ 328, 0 ] ), 8143 entity( "ņ", [ 326, 0 ] ), 8144 entity( "≇", [ 8775, 0 ] ), 8145 entity( "⩭̸", [ 10861, 824 ] ), 8146 entity( "⩂", [ 10818, 0 ] ), 8147 entity( "н", [ 1085, 0 ] ), 8148 entity( "–", [ 8211, 0 ] ), 8149 entity( "≠", [ 8800, 0 ] ), 8150 entity( "⇗", [ 8663, 0 ] ), 8151 entity( "⤤", [ 10532, 0 ] ), 8152 entity( "↗", [ 8599, 0 ] ), 8153 entity( "↗", [ 8599, 0 ] ), 8154 entity( "≐̸", [ 8784, 824 ] ), 8155 entity( "≢", [ 8802, 0 ] ), 8156 entity( "⤨", [ 10536, 0 ] ), 8157 entity( "≂̸", [ 8770, 824 ] ), 8158 entity( "∄", [ 8708, 0 ] ), 8159 entity( "∄", [ 8708, 0 ] ), 8160 entity( "𝔫", [ 120107, 0 ] ), 8161 entity( "≧̸", [ 8807, 824 ] ), 8162 entity( "≱", [ 8817, 0 ] ), 8163 entity( "≱", [ 8817, 0 ] ), 8164 entity( "≧̸", [ 8807, 824 ] ), 8165 entity( "⩾̸", [ 10878, 824 ] ), 8166 entity( "⩾̸", [ 10878, 824 ] ), 8167 entity( "≵", [ 8821, 0 ] ), 8168 entity( "≯", [ 8815, 0 ] ), 8169 entity( "≯", [ 8815, 0 ] ), 8170 entity( "⇎", [ 8654, 0 ] ), 8171 entity( "↮", [ 8622, 0 ] ), 8172 entity( "⫲", [ 10994, 0 ] ), 8173 entity( "∋", [ 8715, 0 ] ), 8174 entity( "⋼", [ 8956, 0 ] ), 8175 entity( "⋺", [ 8954, 0 ] ), 8176 entity( "∋", [ 8715, 0 ] ), 8177 entity( "њ", [ 1114, 0 ] ), 8178 entity( "⇍", [ 8653, 0 ] ), 8179 entity( "≦̸", [ 8806, 824 ] ), 8180 entity( "↚", [ 8602, 0 ] ), 8181 entity( "‥", [ 8229, 0 ] ), 8182 entity( "≰", [ 8816, 0 ] ), 8183 entity( "↚", [ 8602, 0 ] ), 8184 entity( "↮", [ 8622, 0 ] ), 8185 entity( "≰", [ 8816, 0 ] ), 8186 entity( "≦̸", [ 8806, 824 ] ), 8187 entity( "⩽̸", [ 10877, 824 ] ), 8188 entity( "⩽̸", [ 10877, 824 ] ), 8189 entity( "≮", [ 8814, 0 ] ), 8190 entity( "≴", [ 8820, 0 ] ), 8191 entity( "≮", [ 8814, 0 ] ), 8192 entity( "⋪", [ 8938, 0 ] ), 8193 entity( "⋬", [ 8940, 0 ] ), 8194 entity( "∤", [ 8740, 0 ] ), 8195 entity( "𝕟", [ 120159, 0 ] ), 8196 entity( "¬", [ 172, 0 ] ), 8197 entity( "∉", [ 8713, 0 ] ), 8198 entity( "⋹̸", [ 8953, 824 ] ), 8199 entity( "⋵̸", [ 8949, 824 ] ), 8200 entity( "∉", [ 8713, 0 ] ), 8201 entity( "⋷", [ 8951, 0 ] ), 8202 entity( "⋶", [ 8950, 0 ] ), 8203 entity( "∌", [ 8716, 0 ] ), 8204 entity( "∌", [ 8716, 0 ] ), 8205 entity( "⋾", [ 8958, 0 ] ), 8206 entity( "⋽", [ 8957, 0 ] ), 8207 entity( "∦", [ 8742, 0 ] ), 8208 entity( "∦", [ 8742, 0 ] ), 8209 entity( "⫽⃥", [ 11005, 8421 ] ), 8210 entity( "∂̸", [ 8706, 824 ] ), 8211 entity( "⨔", [ 10772, 0 ] ), 8212 entity( "⊀", [ 8832, 0 ] ), 8213 entity( "⋠", [ 8928, 0 ] ), 8214 entity( "⪯̸", [ 10927, 824 ] ), 8215 entity( "⊀", [ 8832, 0 ] ), 8216 entity( "⪯̸", [ 10927, 824 ] ), 8217 entity( "⇏", [ 8655, 0 ] ), 8218 entity( "↛", [ 8603, 0 ] ), 8219 entity( "⤳̸", [ 10547, 824 ] ), 8220 entity( "↝̸", [ 8605, 824 ] ), 8221 entity( "↛", [ 8603, 0 ] ), 8222 entity( "⋫", [ 8939, 0 ] ), 8223 entity( "⋭", [ 8941, 0 ] ), 8224 entity( "⊁", [ 8833, 0 ] ), 8225 entity( "⋡", [ 8929, 0 ] ), 8226 entity( "⪰̸", [ 10928, 824 ] ), 8227 entity( "𝓃", [ 120003, 0 ] ), 8228 entity( "∤", [ 8740, 0 ] ), 8229 entity( "∦", [ 8742, 0 ] ), 8230 entity( "≁", [ 8769, 0 ] ), 8231 entity( "≄", [ 8772, 0 ] ), 8232 entity( "≄", [ 8772, 0 ] ), 8233 entity( "∤", [ 8740, 0 ] ), 8234 entity( "∦", [ 8742, 0 ] ), 8235 entity( "⋢", [ 8930, 0 ] ), 8236 entity( "⋣", [ 8931, 0 ] ), 8237 entity( "⊄", [ 8836, 0 ] ), 8238 entity( "⫅̸", [ 10949, 824 ] ), 8239 entity( "⊈", [ 8840, 0 ] ), 8240 entity( "⊂⃒", [ 8834, 8402 ] ), 8241 entity( "⊈", [ 8840, 0 ] ), 8242 entity( "⫅̸", [ 10949, 824 ] ), 8243 entity( "⊁", [ 8833, 0 ] ), 8244 entity( "⪰̸", [ 10928, 824 ] ), 8245 entity( "⊅", [ 8837, 0 ] ), 8246 entity( "⫆̸", [ 10950, 824 ] ), 8247 entity( "⊉", [ 8841, 0 ] ), 8248 entity( "⊃⃒", [ 8835, 8402 ] ), 8249 entity( "⊉", [ 8841, 0 ] ), 8250 entity( "⫆̸", [ 10950, 824 ] ), 8251 entity( "≹", [ 8825, 0 ] ), 8252 entity( "ñ", [ 241, 0 ] ), 8253 entity( "≸", [ 8824, 0 ] ), 8254 entity( "⋪", [ 8938, 0 ] ), 8255 entity( "⋬", [ 8940, 0 ] ), 8256 entity( "⋫", [ 8939, 0 ] ), 8257 entity( "⋭", [ 8941, 0 ] ), 8258 entity( "ν", [ 957, 0 ] ), 8259 entity( "#", [ 35, 0 ] ), 8260 entity( "№", [ 8470, 0 ] ), 8261 entity( " ", [ 8199, 0 ] ), 8262 entity( "⊭", [ 8877, 0 ] ), 8263 entity( "⤄", [ 10500, 0 ] ), 8264 entity( "≍⃒", [ 8781, 8402 ] ), 8265 entity( "⊬", [ 8876, 0 ] ), 8266 entity( "≥⃒", [ 8805, 8402 ] ), 8267 entity( ">⃒", [ 62, 8402 ] ), 8268 entity( "⧞", [ 10718, 0 ] ), 8269 entity( "⤂", [ 10498, 0 ] ), 8270 entity( "≤⃒", [ 8804, 8402 ] ), 8271 entity( "<⃒", [ 60, 8402 ] ), 8272 entity( "⊴⃒", [ 8884, 8402 ] ), 8273 entity( "⤃", [ 10499, 0 ] ), 8274 entity( "⊵⃒", [ 8885, 8402 ] ), 8275 entity( "∼⃒", [ 8764, 8402 ] ), 8276 entity( "⇖", [ 8662, 0 ] ), 8277 entity( "⤣", [ 10531, 0 ] ), 8278 entity( "↖", [ 8598, 0 ] ), 8279 entity( "↖", [ 8598, 0 ] ), 8280 entity( "⤧", [ 10535, 0 ] ), 8281 entity( "Ⓢ", [ 9416, 0 ] ), 8282 entity( "ó", [ 243, 0 ] ), 8283 entity( "⊛", [ 8859, 0 ] ), 8284 entity( "⊚", [ 8858, 0 ] ), 8285 entity( "ô", [ 244, 0 ] ), 8286 entity( "о", [ 1086, 0 ] ), 8287 entity( "⊝", [ 8861, 0 ] ), 8288 entity( "ő", [ 337, 0 ] ), 8289 entity( "⨸", [ 10808, 0 ] ), 8290 entity( "⊙", [ 8857, 0 ] ), 8291 entity( "⦼", [ 10684, 0 ] ), 8292 entity( "œ", [ 339, 0 ] ), 8293 entity( "⦿", [ 10687, 0 ] ), 8294 entity( "𝔬", [ 120108, 0 ] ), 8295 entity( "˛", [ 731, 0 ] ), 8296 entity( "ò", [ 242, 0 ] ), 8297 entity( "⧁", [ 10689, 0 ] ), 8298 entity( "⦵", [ 10677, 0 ] ), 8299 entity( "Ω", [ 937, 0 ] ), 8300 entity( "∮", [ 8750, 0 ] ), 8301 entity( "↺", [ 8634, 0 ] ), 8302 entity( "⦾", [ 10686, 0 ] ), 8303 entity( "⦻", [ 10683, 0 ] ), 8304 entity( "‾", [ 8254, 0 ] ), 8305 entity( "⧀", [ 10688, 0 ] ), 8306 entity( "ō", [ 333, 0 ] ), 8307 entity( "ω", [ 969, 0 ] ), 8308 entity( "ο", [ 959, 0 ] ), 8309 entity( "⦶", [ 10678, 0 ] ), 8310 entity( "⊖", [ 8854, 0 ] ), 8311 entity( "𝕠", [ 120160, 0 ] ), 8312 entity( "⦷", [ 10679, 0 ] ), 8313 entity( "⦹", [ 10681, 0 ] ), 8314 entity( "⊕", [ 8853, 0 ] ), 8315 entity( "∨", [ 8744, 0 ] ), 8316 entity( "↻", [ 8635, 0 ] ), 8317 entity( "⩝", [ 10845, 0 ] ), 8318 entity( "ℴ", [ 8500, 0 ] ), 8319 entity( "ℴ", [ 8500, 0 ] ), 8320 entity( "ª", [ 170, 0 ] ), 8321 entity( "º", [ 186, 0 ] ), 8322 entity( "⊶", [ 8886, 0 ] ), 8323 entity( "⩖", [ 10838, 0 ] ), 8324 entity( "⩗", [ 10839, 0 ] ), 8325 entity( "⩛", [ 10843, 0 ] ), 8326 entity( "ℴ", [ 8500, 0 ] ), 8327 entity( "ø", [ 248, 0 ] ), 8328 entity( "⊘", [ 8856, 0 ] ), 8329 entity( "õ", [ 245, 0 ] ), 8330 entity( "⊗", [ 8855, 0 ] ), 8331 entity( "⨶", [ 10806, 0 ] ), 8332 entity( "ö", [ 246, 0 ] ), 8333 entity( "⌽", [ 9021, 0 ] ), 8334 entity( "∥", [ 8741, 0 ] ), 8335 entity( "¶", [ 182, 0 ] ), 8336 entity( "∥", [ 8741, 0 ] ), 8337 entity( "⫳", [ 10995, 0 ] ), 8338 entity( "⫽", [ 11005, 0 ] ), 8339 entity( "∂", [ 8706, 0 ] ), 8340 entity( "п", [ 1087, 0 ] ), 8341 entity( "%", [ 37, 0 ] ), 8342 entity( ".", [ 46, 0 ] ), 8343 entity( "‰", [ 8240, 0 ] ), 8344 entity( "⊥", [ 8869, 0 ] ), 8345 entity( "‱", [ 8241, 0 ] ), 8346 entity( "𝔭", [ 120109, 0 ] ), 8347 entity( "φ", [ 966, 0 ] ), 8348 entity( "ϕ", [ 981, 0 ] ), 8349 entity( "ℳ", [ 8499, 0 ] ), 8350 entity( "☎", [ 9742, 0 ] ), 8351 entity( "π", [ 960, 0 ] ), 8352 entity( "⋔", [ 8916, 0 ] ), 8353 entity( "ϖ", [ 982, 0 ] ), 8354 entity( "ℏ", [ 8463, 0 ] ), 8355 entity( "ℎ", [ 8462, 0 ] ), 8356 entity( "ℏ", [ 8463, 0 ] ), 8357 entity( "+", [ 43, 0 ] ), 8358 entity( "⨣", [ 10787, 0 ] ), 8359 entity( "⊞", [ 8862, 0 ] ), 8360 entity( "⨢", [ 10786, 0 ] ), 8361 entity( "∔", [ 8724, 0 ] ), 8362 entity( "⨥", [ 10789, 0 ] ), 8363 entity( "⩲", [ 10866, 0 ] ), 8364 entity( "±", [ 177, 0 ] ), 8365 entity( "⨦", [ 10790, 0 ] ), 8366 entity( "⨧", [ 10791, 0 ] ), 8367 entity( "±", [ 177, 0 ] ), 8368 entity( "⨕", [ 10773, 0 ] ), 8369 entity( "𝕡", [ 120161, 0 ] ), 8370 entity( "£", [ 163, 0 ] ), 8371 entity( "≺", [ 8826, 0 ] ), 8372 entity( "⪳", [ 10931, 0 ] ), 8373 entity( "⪷", [ 10935, 0 ] ), 8374 entity( "≼", [ 8828, 0 ] ), 8375 entity( "⪯", [ 10927, 0 ] ), 8376 entity( "≺", [ 8826, 0 ] ), 8377 entity( "⪷", [ 10935, 0 ] ), 8378 entity( "≼", [ 8828, 0 ] ), 8379 entity( "⪯", [ 10927, 0 ] ), 8380 entity( "⪹", [ 10937, 0 ] ), 8381 entity( "⪵", [ 10933, 0 ] ), 8382 entity( "⋨", [ 8936, 0 ] ), 8383 entity( "≾", [ 8830, 0 ] ), 8384 entity( "′", [ 8242, 0 ] ), 8385 entity( "ℙ", [ 8473, 0 ] ), 8386 entity( "⪵", [ 10933, 0 ] ), 8387 entity( "⪹", [ 10937, 0 ] ), 8388 entity( "⋨", [ 8936, 0 ] ), 8389 entity( "∏", [ 8719, 0 ] ), 8390 entity( "⌮", [ 9006, 0 ] ), 8391 entity( "⌒", [ 8978, 0 ] ), 8392 entity( "⌓", [ 8979, 0 ] ), 8393 entity( "∝", [ 8733, 0 ] ), 8394 entity( "∝", [ 8733, 0 ] ), 8395 entity( "≾", [ 8830, 0 ] ), 8396 entity( "⊰", [ 8880, 0 ] ), 8397 entity( "𝓅", [ 120005, 0 ] ), 8398 entity( "ψ", [ 968, 0 ] ), 8399 entity( " ", [ 8200, 0 ] ), 8400 entity( "𝔮", [ 120110, 0 ] ), 8401 entity( "⨌", [ 10764, 0 ] ), 8402 entity( "𝕢", [ 120162, 0 ] ), 8403 entity( "⁗", [ 8279, 0 ] ), 8404 entity( "𝓆", [ 120006, 0 ] ), 8405 entity( "ℍ", [ 8461, 0 ] ), 8406 entity( "⨖", [ 10774, 0 ] ), 8407 entity( "?", [ 63, 0 ] ), 8408 entity( "≟", [ 8799, 0 ] ), 8409 entity( """, [ 34, 0 ] ), 8410 entity( "⇛", [ 8667, 0 ] ), 8411 entity( "⇒", [ 8658, 0 ] ), 8412 entity( "⤜", [ 10524, 0 ] ), 8413 entity( "⤏", [ 10511, 0 ] ), 8414 entity( "⥤", [ 10596, 0 ] ), 8415 entity( "∽̱", [ 8765, 817 ] ), 8416 entity( "ŕ", [ 341, 0 ] ), 8417 entity( "√", [ 8730, 0 ] ), 8418 entity( "⦳", [ 10675, 0 ] ), 8419 entity( "⟩", [ 10217, 0 ] ), 8420 entity( "⦒", [ 10642, 0 ] ), 8421 entity( "⦥", [ 10661, 0 ] ), 8422 entity( "⟩", [ 10217, 0 ] ), 8423 entity( "»", [ 187, 0 ] ), 8424 entity( "→", [ 8594, 0 ] ), 8425 entity( "⥵", [ 10613, 0 ] ), 8426 entity( "⇥", [ 8677, 0 ] ), 8427 entity( "⤠", [ 10528, 0 ] ), 8428 entity( "⤳", [ 10547, 0 ] ), 8429 entity( "⤞", [ 10526, 0 ] ), 8430 entity( "↪", [ 8618, 0 ] ), 8431 entity( "↬", [ 8620, 0 ] ), 8432 entity( "⥅", [ 10565, 0 ] ), 8433 entity( "⥴", [ 10612, 0 ] ), 8434 entity( "↣", [ 8611, 0 ] ), 8435 entity( "↝", [ 8605, 0 ] ), 8436 entity( "⤚", [ 10522, 0 ] ), 8437 entity( "∶", [ 8758, 0 ] ), 8438 entity( "ℚ", [ 8474, 0 ] ), 8439 entity( "⤍", [ 10509, 0 ] ), 8440 entity( "❳", [ 10099, 0 ] ), 8441 entity( "}", [ 125, 0 ] ), 8442 entity( "]", [ 93, 0 ] ), 8443 entity( "⦌", [ 10636, 0 ] ), 8444 entity( "⦎", [ 10638, 0 ] ), 8445 entity( "⦐", [ 10640, 0 ] ), 8446 entity( "ř", [ 345, 0 ] ), 8447 entity( "ŗ", [ 343, 0 ] ), 8448 entity( "⌉", [ 8969, 0 ] ), 8449 entity( "}", [ 125, 0 ] ), 8450 entity( "р", [ 1088, 0 ] ), 8451 entity( "⤷", [ 10551, 0 ] ), 8452 entity( "⥩", [ 10601, 0 ] ), 8453 entity( "”", [ 8221, 0 ] ), 8454 entity( "”", [ 8221, 0 ] ), 8455 entity( "↳", [ 8627, 0 ] ), 8456 entity( "ℜ", [ 8476, 0 ] ), 8457 entity( "ℛ", [ 8475, 0 ] ), 8458 entity( "ℜ", [ 8476, 0 ] ), 8459 entity( "ℝ", [ 8477, 0 ] ), 8460 entity( "▭", [ 9645, 0 ] ), 8461 entity( "®", [ 174, 0 ] ), 8462 entity( "⥽", [ 10621, 0 ] ), 8463 entity( "⌋", [ 8971, 0 ] ), 8464 entity( "𝔯", [ 120111, 0 ] ), 8465 entity( "⇁", [ 8641, 0 ] ), 8466 entity( "⇀", [ 8640, 0 ] ), 8467 entity( "⥬", [ 10604, 0 ] ), 8468 entity( "ρ", [ 961, 0 ] ), 8469 entity( "ϱ", [ 1009, 0 ] ), 8470 entity( "→", [ 8594, 0 ] ), 8471 entity( "↣", [ 8611, 0 ] ), 8472 entity( "⇁", [ 8641, 0 ] ), 8473 entity( "⇀", [ 8640, 0 ] ), 8474 entity( "⇄", [ 8644, 0 ] ), 8475 entity( "⇌", [ 8652, 0 ] ), 8476 entity( "⇉", [ 8649, 0 ] ), 8477 entity( "↝", [ 8605, 0 ] ), 8478 entity( "⋌", [ 8908, 0 ] ), 8479 entity( "˚", [ 730, 0 ] ), 8480 entity( "≓", [ 8787, 0 ] ), 8481 entity( "⇄", [ 8644, 0 ] ), 8482 entity( "⇌", [ 8652, 0 ] ), 8483 entity( "‏", [ 8207, 0 ] ), 8484 entity( "⎱", [ 9137, 0 ] ), 8485 entity( "⎱", [ 9137, 0 ] ), 8486 entity( "⫮", [ 10990, 0 ] ), 8487 entity( "⟭", [ 10221, 0 ] ), 8488 entity( "⇾", [ 8702, 0 ] ), 8489 entity( "⟧", [ 10215, 0 ] ), 8490 entity( "⦆", [ 10630, 0 ] ), 8491 entity( "𝕣", [ 120163, 0 ] ), 8492 entity( "⨮", [ 10798, 0 ] ), 8493 entity( "⨵", [ 10805, 0 ] ), 8494 entity( ")", [ 41, 0 ] ), 8495 entity( "⦔", [ 10644, 0 ] ), 8496 entity( "⨒", [ 10770, 0 ] ), 8497 entity( "⇉", [ 8649, 0 ] ), 8498 entity( "›", [ 8250, 0 ] ), 8499 entity( "𝓇", [ 120007, 0 ] ), 8500 entity( "↱", [ 8625, 0 ] ), 8501 entity( "]", [ 93, 0 ] ), 8502 entity( "’", [ 8217, 0 ] ), 8503 entity( "’", [ 8217, 0 ] ), 8504 entity( "⋌", [ 8908, 0 ] ), 8505 entity( "⋊", [ 8906, 0 ] ), 8506 entity( "▹", [ 9657, 0 ] ), 8507 entity( "⊵", [ 8885, 0 ] ), 8508 entity( "▸", [ 9656, 0 ] ), 8509 entity( "⧎", [ 10702, 0 ] ), 8510 entity( "⥨", [ 10600, 0 ] ), 8511 entity( "℞", [ 8478, 0 ] ), 8512 entity( "ś", [ 347, 0 ] ), 8513 entity( "‚", [ 8218, 0 ] ), 8514 entity( "≻", [ 8827, 0 ] ), 8515 entity( "⪴", [ 10932, 0 ] ), 8516 entity( "⪸", [ 10936, 0 ] ), 8517 entity( "š", [ 353, 0 ] ), 8518 entity( "≽", [ 8829, 0 ] ), 8519 entity( "⪰", [ 10928, 0 ] ), 8520 entity( "ş", [ 351, 0 ] ), 8521 entity( "ŝ", [ 349, 0 ] ), 8522 entity( "⪶", [ 10934, 0 ] ), 8523 entity( "⪺", [ 10938, 0 ] ), 8524 entity( "⋩", [ 8937, 0 ] ), 8525 entity( "⨓", [ 10771, 0 ] ), 8526 entity( "≿", [ 8831, 0 ] ), 8527 entity( "с", [ 1089, 0 ] ), 8528 entity( "⋅", [ 8901, 0 ] ), 8529 entity( "⊡", [ 8865, 0 ] ), 8530 entity( "⩦", [ 10854, 0 ] ), 8531 entity( "⇘", [ 8664, 0 ] ), 8532 entity( "⤥", [ 10533, 0 ] ), 8533 entity( "↘", [ 8600, 0 ] ), 8534 entity( "↘", [ 8600, 0 ] ), 8535 entity( "§", [ 167, 0 ] ), 8536 entity( ";", [ 59, 0 ] ), 8537 entity( "⤩", [ 10537, 0 ] ), 8538 entity( "∖", [ 8726, 0 ] ), 8539 entity( "∖", [ 8726, 0 ] ), 8540 entity( "✶", [ 10038, 0 ] ), 8541 entity( "𝔰", [ 120112, 0 ] ), 8542 entity( "⌢", [ 8994, 0 ] ), 8543 entity( "♯", [ 9839, 0 ] ), 8544 entity( "щ", [ 1097, 0 ] ), 8545 entity( "ш", [ 1096, 0 ] ), 8546 entity( "∣", [ 8739, 0 ] ), 8547 entity( "∥", [ 8741, 0 ] ), 8548 entity( "­", [ 173, 0 ] ), 8549 entity( "σ", [ 963, 0 ] ), 8550 entity( "ς", [ 962, 0 ] ), 8551 entity( "ς", [ 962, 0 ] ), 8552 entity( "∼", [ 8764, 0 ] ), 8553 entity( "⩪", [ 10858, 0 ] ), 8554 entity( "≃", [ 8771, 0 ] ), 8555 entity( "≃", [ 8771, 0 ] ), 8556 entity( "⪞", [ 10910, 0 ] ), 8557 entity( "⪠", [ 10912, 0 ] ), 8558 entity( "⪝", [ 10909, 0 ] ), 8559 entity( "⪟", [ 10911, 0 ] ), 8560 entity( "≆", [ 8774, 0 ] ), 8561 entity( "⨤", [ 10788, 0 ] ), 8562 entity( "⥲", [ 10610, 0 ] ), 8563 entity( "←", [ 8592, 0 ] ), 8564 entity( "∖", [ 8726, 0 ] ), 8565 entity( "⨳", [ 10803, 0 ] ), 8566 entity( "⧤", [ 10724, 0 ] ), 8567 entity( "∣", [ 8739, 0 ] ), 8568 entity( "⌣", [ 8995, 0 ] ), 8569 entity( "⪪", [ 10922, 0 ] ), 8570 entity( "⪬", [ 10924, 0 ] ), 8571 entity( "⪬︀", [ 10924, 65024 ] ), 8572 entity( "ь", [ 1100, 0 ] ), 8573 entity( "/", [ 47, 0 ] ), 8574 entity( "⧄", [ 10692, 0 ] ), 8575 entity( "⌿", [ 9023, 0 ] ), 8576 entity( "𝕤", [ 120164, 0 ] ), 8577 entity( "♠", [ 9824, 0 ] ), 8578 entity( "♠", [ 9824, 0 ] ), 8579 entity( "∥", [ 8741, 0 ] ), 8580 entity( "⊓", [ 8851, 0 ] ), 8581 entity( "⊓︀", [ 8851, 65024 ] ), 8582 entity( "⊔", [ 8852, 0 ] ), 8583 entity( "⊔︀", [ 8852, 65024 ] ), 8584 entity( "⊏", [ 8847, 0 ] ), 8585 entity( "⊑", [ 8849, 0 ] ), 8586 entity( "⊏", [ 8847, 0 ] ), 8587 entity( "⊑", [ 8849, 0 ] ), 8588 entity( "⊐", [ 8848, 0 ] ), 8589 entity( "⊒", [ 8850, 0 ] ), 8590 entity( "⊐", [ 8848, 0 ] ), 8591 entity( "⊒", [ 8850, 0 ] ), 8592 entity( "□", [ 9633, 0 ] ), 8593 entity( "□", [ 9633, 0 ] ), 8594 entity( "▪", [ 9642, 0 ] ), 8595 entity( "▪", [ 9642, 0 ] ), 8596 entity( "→", [ 8594, 0 ] ), 8597 entity( "𝓈", [ 120008, 0 ] ), 8598 entity( "∖", [ 8726, 0 ] ), 8599 entity( "⌣", [ 8995, 0 ] ), 8600 entity( "⋆", [ 8902, 0 ] ), 8601 entity( "☆", [ 9734, 0 ] ), 8602 entity( "★", [ 9733, 0 ] ), 8603 entity( "ϵ", [ 1013, 0 ] ), 8604 entity( "ϕ", [ 981, 0 ] ), 8605 entity( "¯", [ 175, 0 ] ), 8606 entity( "⊂", [ 8834, 0 ] ), 8607 entity( "⫅", [ 10949, 0 ] ), 8608 entity( "⪽", [ 10941, 0 ] ), 8609 entity( "⊆", [ 8838, 0 ] ), 8610 entity( "⫃", [ 10947, 0 ] ), 8611 entity( "⫁", [ 10945, 0 ] ), 8612 entity( "⫋", [ 10955, 0 ] ), 8613 entity( "⊊", [ 8842, 0 ] ), 8614 entity( "⪿", [ 10943, 0 ] ), 8615 entity( "⥹", [ 10617, 0 ] ), 8616 entity( "⊂", [ 8834, 0 ] ), 8617 entity( "⊆", [ 8838, 0 ] ), 8618 entity( "⫅", [ 10949, 0 ] ), 8619 entity( "⊊", [ 8842, 0 ] ), 8620 entity( "⫋", [ 10955, 0 ] ), 8621 entity( "⫇", [ 10951, 0 ] ), 8622 entity( "⫕", [ 10965, 0 ] ), 8623 entity( "⫓", [ 10963, 0 ] ), 8624 entity( "≻", [ 8827, 0 ] ), 8625 entity( "⪸", [ 10936, 0 ] ), 8626 entity( "≽", [ 8829, 0 ] ), 8627 entity( "⪰", [ 10928, 0 ] ), 8628 entity( "⪺", [ 10938, 0 ] ), 8629 entity( "⪶", [ 10934, 0 ] ), 8630 entity( "⋩", [ 8937, 0 ] ), 8631 entity( "≿", [ 8831, 0 ] ), 8632 entity( "∑", [ 8721, 0 ] ), 8633 entity( "♪", [ 9834, 0 ] ), 8634 entity( "¹", [185, 0 ] ), 8635 entity( "¹", [ 185, 0 ] ), 8636 entity( "²", [178, 0 ] ), 8637 entity( "²", [ 178, 0 ] ), 8638 entity( "³", [179, 0 ] ), 8639 entity( "³", [ 179, 0 ] ), 8640 entity( "⊃", [ 8835, 0 ] ), 8641 entity( "⫆", [ 10950, 0 ] ), 8642 entity( "⪾", [ 10942, 0 ] ), 8643 entity( "⫘", [ 10968, 0 ] ), 8644 entity( "⊇", [ 8839, 0 ] ), 8645 entity( "⫄", [ 10948, 0 ] ), 8646 entity( "⟉", [ 10185, 0 ] ), 8647 entity( "⫗", [ 10967, 0 ] ), 8648 entity( "⥻", [ 10619, 0 ] ), 8649 entity( "⫂", [ 10946, 0 ] ), 8650 entity( "⫌", [ 10956, 0 ] ), 8651 entity( "⊋", [ 8843, 0 ] ), 8652 entity( "⫀", [ 10944, 0 ] ), 8653 entity( "⊃", [ 8835, 0 ] ), 8654 entity( "⊇", [ 8839, 0 ] ), 8655 entity( "⫆", [ 10950, 0 ] ), 8656 entity( "⊋", [ 8843, 0 ] ), 8657 entity( "⫌", [ 10956, 0 ] ), 8658 entity( "⫈", [ 10952, 0 ] ), 8659 entity( "⫔", [ 10964, 0 ] ), 8660 entity( "⫖", [ 10966, 0 ] ), 8661 entity( "⇙", [ 8665, 0 ] ), 8662 entity( "⤦", [ 10534, 0 ] ), 8663 entity( "↙", [ 8601, 0 ] ), 8664 entity( "↙", [ 8601, 0 ] ), 8665 entity( "⤪", [ 10538, 0 ] ), 8666 entity( "ß", [ 223, 0 ] ), 8667 entity( "⌖", [ 8982, 0 ] ), 8668 entity( "τ", [ 964, 0 ] ), 8669 entity( "⎴", [ 9140, 0 ] ), 8670 entity( "ť", [ 357, 0 ] ), 8671 entity( "ţ", [ 355, 0 ] ), 8672 entity( "т", [ 1090, 0 ] ), 8673 entity( "⃛", [ 8411, 0 ] ), 8674 entity( "⌕", [ 8981, 0 ] ), 8675 entity( "𝔱", [ 120113, 0 ] ), 8676 entity( "∴", [ 8756, 0 ] ), 8677 entity( "∴", [ 8756, 0 ] ), 8678 entity( "θ", [ 952, 0 ] ), 8679 entity( "ϑ", [ 977, 0 ] ), 8680 entity( "ϑ", [ 977, 0 ] ), 8681 entity( "≈", [ 8776, 0 ] ), 8682 entity( "∼", [ 8764, 0 ] ), 8683 entity( " ", [ 8201, 0 ] ), 8684 entity( "≈", [ 8776, 0 ] ), 8685 entity( "∼", [ 8764, 0 ] ), 8686 entity( "þ", [ 254, 0 ] ), 8687 entity( "˜", [ 732, 0 ] ), 8688 entity( "×", [ 215, 0 ] ), 8689 entity( "⊠", [ 8864, 0 ] ), 8690 entity( "⨱", [ 10801, 0 ] ), 8691 entity( "⨰", [ 10800, 0 ] ), 8692 entity( "∭", [ 8749, 0 ] ), 8693 entity( "⤨", [ 10536, 0 ] ), 8694 entity( "⊤", [ 8868, 0 ] ), 8695 entity( "⌶", [ 9014, 0 ] ), 8696 entity( "⫱", [ 10993, 0 ] ), 8697 entity( "𝕥", [ 120165, 0 ] ), 8698 entity( "⫚", [ 10970, 0 ] ), 8699 entity( "⤩", [ 10537, 0 ] ), 8700 entity( "‴", [ 8244, 0 ] ), 8701 entity( "™", [ 8482, 0 ] ), 8702 entity( "▵", [ 9653, 0 ] ), 8703 entity( "▿", [ 9663, 0 ] ), 8704 entity( "◃", [ 9667, 0 ] ), 8705 entity( "⊴", [ 8884, 0 ] ), 8706 entity( "≜", [ 8796, 0 ] ), 8707 entity( "▹", [ 9657, 0 ] ), 8708 entity( "⊵", [ 8885, 0 ] ), 8709 entity( "◬", [ 9708, 0 ] ), 8710 entity( "≜", [ 8796, 0 ] ), 8711 entity( "⨺", [ 10810, 0 ] ), 8712 entity( "⨹", [ 10809, 0 ] ), 8713 entity( "⧍", [ 10701, 0 ] ), 8714 entity( "⨻", [ 10811, 0 ] ), 8715 entity( "⏢", [ 9186, 0 ] ), 8716 entity( "𝓉", [ 120009, 0 ] ), 8717 entity( "ц", [ 1094, 0 ] ), 8718 entity( "ћ", [ 1115, 0 ] ), 8719 entity( "ŧ", [ 359, 0 ] ), 8720 entity( "≬", [ 8812, 0 ] ), 8721 entity( "↞", [ 8606, 0 ] ), 8722 entity( "↠", [ 8608, 0 ] ), 8723 entity( "⇑", [ 8657, 0 ] ), 8724 entity( "⥣", [ 10595, 0 ] ), 8725 entity( "ú", [ 250, 0 ] ), 8726 entity( "↑", [ 8593, 0 ] ), 8727 entity( "ў", [ 1118, 0 ] ), 8728 entity( "ŭ", [ 365, 0 ] ), 8729 entity( "û", [ 251, 0 ] ), 8730 entity( "у", [ 1091, 0 ] ), 8731 entity( "⇅", [ 8645, 0 ] ), 8732 entity( "ű", [ 369, 0 ] ), 8733 entity( "⥮", [ 10606, 0 ] ), 8734 entity( "⥾", [ 10622, 0 ] ), 8735 entity( "𝔲", [ 120114, 0 ] ), 8736 entity( "ù", [ 249, 0 ] ), 8737 entity( "↿", [ 8639, 0 ] ), 8738 entity( "↾", [ 8638, 0 ] ), 8739 entity( "▀", [ 9600, 0 ] ), 8740 entity( "⌜", [ 8988, 0 ] ), 8741 entity( "⌜", [ 8988, 0 ] ), 8742 entity( "⌏", [ 8975, 0 ] ), 8743 entity( "◸", [ 9720, 0 ] ), 8744 entity( "ū", [ 363, 0 ] ), 8745 entity( "¨", [ 168, 0 ] ), 8746 entity( "ų", [ 371, 0 ] ), 8747 entity( "𝕦", [ 120166, 0 ] ), 8748 entity( "↑", [ 8593, 0 ] ), 8749 entity( "↕", [ 8597, 0 ] ), 8750 entity( "↿", [ 8639, 0 ] ), 8751 entity( "↾", [ 8638, 0 ] ), 8752 entity( "⊎", [ 8846, 0 ] ), 8753 entity( "υ", [ 965, 0 ] ), 8754 entity( "ϒ", [ 978, 0 ] ), 8755 entity( "υ", [ 965, 0 ] ), 8756 entity( "⇈", [ 8648, 0 ] ), 8757 entity( "⌝", [ 8989, 0 ] ), 8758 entity( "⌝", [ 8989, 0 ] ), 8759 entity( "⌎", [ 8974, 0 ] ), 8760 entity( "ů", [ 367, 0 ] ), 8761 entity( "◹", [ 9721, 0 ] ), 8762 entity( "𝓊", [ 120010, 0 ] ), 8763 entity( "⋰", [ 8944, 0 ] ), 8764 entity( "ũ", [ 361, 0 ] ), 8765 entity( "▵", [ 9653, 0 ] ), 8766 entity( "▴", [ 9652, 0 ] ), 8767 entity( "⇈", [ 8648, 0 ] ), 8768 entity( "ü", [ 252, 0 ] ), 8769 entity( "⦧", [ 10663, 0 ] ), 8770 entity( "⇕", [ 8661, 0 ] ), 8771 entity( "⫨", [ 10984, 0 ] ), 8772 entity( "⫩", [ 10985, 0 ] ), 8773 entity( "⊨", [ 8872, 0 ] ), 8774 entity( "⦜", [ 10652, 0 ] ), 8775 entity( "ϵ", [ 1013, 0 ] ), 8776 entity( "ϰ", [ 1008, 0 ] ), 8777 entity( "∅", [ 8709, 0 ] ), 8778 entity( "ϕ", [ 981, 0 ] ), 8779 entity( "ϖ", [ 982, 0 ] ), 8780 entity( "∝", [ 8733, 0 ] ), 8781 entity( "↕", [ 8597, 0 ] ), 8782 entity( "ϱ", [ 1009, 0 ] ), 8783 entity( "ς", [ 962, 0 ] ), 8784 entity( "⊊︀", [ 8842, 65024 ] ), 8785 entity( "⫋︀", [ 10955, 65024 ] ), 8786 entity( "⊋︀", [ 8843, 65024 ] ), 8787 entity( "⫌︀", [ 10956, 65024 ] ), 8788 entity( "ϑ", [ 977, 0 ] ), 8789 entity( "⊲", [ 8882, 0 ] ), 8790 entity( "⊳", [ 8883, 0 ] ), 8791 entity( "в", [ 1074, 0 ] ), 8792 entity( "⊢", [ 8866, 0 ] ), 8793 entity( "∨", [ 8744, 0 ] ), 8794 entity( "⊻", [ 8891, 0 ] ), 8795 entity( "≚", [ 8794, 0 ] ), 8796 entity( "⋮", [ 8942, 0 ] ), 8797 entity( "|", [ 124, 0 ] ), 8798 entity( "|", [ 124, 0 ] ), 8799 entity( "𝔳", [ 120115, 0 ] ), 8800 entity( "⊲", [ 8882, 0 ] ), 8801 entity( "⊂⃒", [ 8834, 8402 ] ), 8802 entity( "⊃⃒", [ 8835, 8402 ] ), 8803 entity( "𝕧", [ 120167, 0 ] ), 8804 entity( "∝", [ 8733, 0 ] ), 8805 entity( "⊳", [ 8883, 0 ] ), 8806 entity( "𝓋", [ 120011, 0 ] ), 8807 entity( "⫋︀", [ 10955, 65024 ] ), 8808 entity( "⊊︀", [ 8842, 65024 ] ), 8809 entity( "⫌︀", [ 10956, 65024 ] ), 8810 entity( "⊋︀", [ 8843, 65024 ] ), 8811 entity( "⦚", [ 10650, 0 ] ), 8812 entity( "ŵ", [ 373, 0 ] ), 8813 entity( "⩟", [ 10847, 0 ] ), 8814 entity( "∧", [ 8743, 0 ] ), 8815 entity( "≙", [ 8793, 0 ] ), 8816 entity( "℘", [ 8472, 0 ] ), 8817 entity( "𝔴", [ 120116, 0 ] ), 8818 entity( "𝕨", [ 120168, 0 ] ), 8819 entity( "℘", [ 8472, 0 ] ), 8820 entity( "≀", [ 8768, 0 ] ), 8821 entity( "≀", [ 8768, 0 ] ), 8822 entity( "𝓌", [ 120012, 0 ] ), 8823 entity( "⋂", [ 8898, 0 ] ), 8824 entity( "◯", [ 9711, 0 ] ), 8825 entity( "⋃", [ 8899, 0 ] ), 8826 entity( "▽", [ 9661, 0 ] ), 8827 entity( "𝔵", [ 120117, 0 ] ), 8828 entity( "⟺", [ 10234, 0 ] ), 8829 entity( "⟷", [ 10231, 0 ] ), 8830 entity( "ξ", [ 958, 0 ] ), 8831 entity( "⟸", [ 10232, 0 ] ), 8832 entity( "⟵", [ 10229, 0 ] ), 8833 entity( "⟼", [ 10236, 0 ] ), 8834 entity( "⋻", [ 8955, 0 ] ), 8835 entity( "⨀", [ 10752, 0 ] ), 8836 entity( "𝕩", [ 120169, 0 ] ), 8837 entity( "⨁", [ 10753, 0 ] ), 8838 entity( "⨂", [ 10754, 0 ] ), 8839 entity( "⟹", [ 10233, 0 ] ), 8840 entity( "⟶", [ 10230, 0 ] ), 8841 entity( "𝓍", [ 120013, 0 ] ), 8842 entity( "⨆", [ 10758, 0 ] ), 8843 entity( "⨄", [ 10756, 0 ] ), 8844 entity( "△", [ 9651, 0 ] ), 8845 entity( "⋁", [ 8897, 0 ] ), 8846 entity( "⋀", [ 8896, 0 ] ), 8847 entity( "ý", [ 253, 0 ] ), 8848 entity( "я", [ 1103, 0 ] ), 8849 entity( "ŷ", [ 375, 0 ] ), 8850 entity( "ы", [ 1099, 0 ] ), 8851 entity( "¥", [ 165, 0 ] ), 8852 entity( "𝔶", [ 120118, 0 ] ), 8853 entity( "ї", [ 1111, 0 ] ), 8854 entity( "𝕪", [ 120170, 0 ] ), 8855 entity( "𝓎", [ 120014, 0 ] ), 8856 entity( "ю", [ 1102, 0 ] ), 8857 entity( "ÿ", [ 255, 0 ] ), 8858 entity( "ź", [ 378, 0 ] ), 8859 entity( "ž", [ 382, 0 ] ), 8860 entity( "з", [ 1079, 0 ] ), 8861 entity( "ż", [ 380, 0 ] ), 8862 entity( "ℨ", [ 8488, 0 ] ), 8863 entity( "ζ", [ 950, 0 ] ), 8864 entity( "𝔷", [ 120119, 0 ] ), 8865 entity( "ж", [ 1078, 0 ] ), 8866 entity( "⇝", [ 8669, 0 ] ), 8867 entity( "𝕫", [ 120171, 0 ] ), 8868 entity( "𝓏", [ 120015, 0 ] ), 8869 entity( "‍", [ 8205, 0 ] ), 8870 entity( "‌", [ 8204, 0 ] ), 8871 ]; 8872 8873 8874 struct entity_key 8875 { 8876 const(char)* name; 8877 size_t name_size; 8878 } 8879 8880 extern(C) int entity_cmp(const(void)* p_key, const(void)* p_entity) 8881 { 8882 entity_key* key = cast(entity_key*) p_key; 8883 entity* ent = cast(entity*) p_entity; 8884 return strncmp(key.name, ent.name, key.name_size); 8885 } 8886 8887 const(entity)* entity_lookup(const(char)* name, size_t name_size) 8888 { 8889 entity_key key = entity_key(name, name_size); 8890 const(void)* result = bsearch(&key, cast(const(void)*)entity_table.ptr, entity_table.length, entity.sizeof, &entity_cmp); 8891 return cast(const(entity)*)result; 8892 } 8893 8894 // 8895 // HTML RENDERING 8896 // 8897 8898 /* If set, debug output from md_parse() is sent to stderr. */ 8899 enum MD_RENDER_FLAG_DEBUG = 0x0001; 8900 8901 enum MD_RENDER_FLAG_VERBATIM_ENTITIES = 0x0002; 8902 8903 8904 struct MD_RENDER_HTML 8905 { 8906 void function(const(MD_CHAR)*, MD_SIZE, void*) nothrow @nogc process_output; 8907 void* userdata; 8908 uint flags; 8909 int image_nesting_level; 8910 char[256] escape_map; 8911 } 8912 8913 8914 /***************************************** 8915 *** HTML rendering helper functions *** 8916 *****************************************/ 8917 8918 /* 8919 #define ISDIGIT(ch) 8920 #define ISLOWER(ch) 8921 #define ISUPPER(ch) 8922 */ 8923 bool ISALNUM_HTML(CHAR ch) 8924 { 8925 return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9'); 8926 } 8927 8928 void render_text(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size) 8929 { 8930 r.process_output(text, size, r.userdata); 8931 } 8932 8933 void RENDER_LITERAL(MD_RENDER_HTML* r, const(MD_CHAR)* literal) 8934 { 8935 render_text(r, literal, cast(uint) strlen(literal)); 8936 } 8937 8938 /* Some characters need to be escaped in normal HTML text. */ 8939 bool HTML_NEED_ESCAPE(MD_RENDER_HTML* r, CHAR ch) 8940 { 8941 return (r.escape_map[cast(ubyte)(ch)] != 0); 8942 } 8943 8944 void render_html_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 8945 { 8946 MD_OFFSET beg = 0; 8947 MD_OFFSET off = 0; 8948 8949 while(1) { 8950 /* Optimization: Use some loop unrolling. */ 8951 while(off + 3 < size && !HTML_NEED_ESCAPE(r, data[off+0]) && !HTML_NEED_ESCAPE(r, data[off+1]) 8952 && !HTML_NEED_ESCAPE(r, data[off+2]) && !HTML_NEED_ESCAPE(r, data[off+3])) 8953 off += 4; 8954 while(off < size && !HTML_NEED_ESCAPE(r, data[off])) 8955 off++; 8956 8957 if(off > beg) 8958 render_text(r, data + beg, off - beg); 8959 8960 if(off < size) { 8961 switch(data[off]) { 8962 case '&': RENDER_LITERAL(r, "&"); break; 8963 case '<': RENDER_LITERAL(r, "<"); break; 8964 case '>': RENDER_LITERAL(r, ">"); break; 8965 case '"': RENDER_LITERAL(r, """); break; 8966 default: break; 8967 } 8968 off++; 8969 } else { 8970 break; 8971 } 8972 beg = off; 8973 } 8974 } 8975 8976 8977 bool URL_NEED_ESCAPE(CHAR ch) 8978 { 8979 return (!ISALNUM_HTML(ch) && strchr("-_.+!*'(),%#@?=;:/,+$", ch) == null); 8980 } 8981 8982 void render_url_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 8983 { 8984 static immutable(MD_CHAR)[] hex_chars = "0123456789ABCDEF"; 8985 MD_OFFSET beg = 0; 8986 MD_OFFSET off = 0; 8987 8988 while(1) { 8989 while(off < size && !URL_NEED_ESCAPE(data[off])) 8990 off++; 8991 if(off > beg) 8992 render_text(r, data + beg, off - beg); 8993 8994 if(off < size) { 8995 char[3] hex; 8996 8997 switch(data[off]) { 8998 case '&': RENDER_LITERAL(r, "&"); break; 8999 case '\'': RENDER_LITERAL(r, "'"); break; 9000 default: 9001 hex[0] = '%'; 9002 hex[1] = hex_chars[(cast(uint)data[off] >> 4) & 0xf]; 9003 hex[2] = hex_chars[(cast(uint)data[off] >> 0) & 0xf]; 9004 render_text(r, hex.ptr, 3); 9005 break; 9006 } 9007 off++; 9008 } else { 9009 break; 9010 } 9011 9012 beg = off; 9013 } 9014 } 9015 9016 uint hex_val(char ch) 9017 { 9018 if('0' <= ch && ch <= '9') 9019 return ch - '0'; 9020 if('A' <= ch && ch <= 'Z') 9021 return ch - 'A' + 10; 9022 else 9023 return ch - 'a' + 10; 9024 } 9025 9026 alias appendFunc = nothrow @nogc void function(MD_RENDER_HTML*, const(MD_CHAR)*, MD_SIZE); 9027 9028 void render_utf8_codepoint(MD_RENDER_HTML* r, uint codepoint, 9029 appendFunc fn_append) 9030 { 9031 static immutable(MD_CHAR)[] utf8_replacement_char = [ 0xef, 0xbf, 0xbd ]; 9032 9033 char[4] utf8; 9034 size_t n; 9035 9036 if(codepoint <= 0x7f) { 9037 n = 1; 9038 utf8[0] = cast(ubyte) codepoint; 9039 } else if(codepoint <= 0x7ff) { 9040 n = 2; 9041 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); 9042 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); 9043 } else if(codepoint <= 0xffff) { 9044 n = 3; 9045 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); 9046 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); 9047 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); 9048 } else { 9049 n = 4; 9050 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); 9051 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); 9052 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); 9053 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); 9054 } 9055 9056 if(0 < codepoint && codepoint <= 0x10ffff) 9057 fn_append(r, utf8.ptr, cast(uint)n); 9058 else 9059 fn_append(r, utf8_replacement_char.ptr, 3); 9060 } 9061 9062 /* Translate entity to its UTF-8 equivalent, or output the verbatim one 9063 * if such entity is unknown (or if the translation is disabled). */ 9064 void render_entity(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size, 9065 appendFunc fn_append) 9066 { 9067 if(r.flags & MD_RENDER_FLAG_VERBATIM_ENTITIES) { 9068 fn_append(r, text, size); 9069 return; 9070 } 9071 9072 /* We assume UTF-8 output is what is desired. */ 9073 if(size > 3 && text[1] == '#') { 9074 uint codepoint = 0; 9075 9076 if(text[2] == 'x' || text[2] == 'X') { 9077 /* Hexadecimal entity (e.g. "�")). */ 9078 MD_SIZE i; 9079 for(i = 3; i < size-1; i++) 9080 codepoint = 16 * codepoint + hex_val(text[i]); 9081 } else { 9082 /* Decimal entity (e.g. "&1234;") */ 9083 MD_SIZE i; 9084 for(i = 2; i < size-1; i++) 9085 codepoint = 10 * codepoint + (text[i] - '0'); 9086 } 9087 9088 render_utf8_codepoint(r, codepoint, fn_append); 9089 return; 9090 } else { 9091 /* Named entity (e.g. " "). */ 9092 const(entity)* ent; 9093 9094 ent = entity_lookup(text, size); 9095 if(ent != null) { 9096 render_utf8_codepoint(r, ent.codepoints[0], fn_append); 9097 if(ent.codepoints[1]) 9098 render_utf8_codepoint(r, ent.codepoints[1], fn_append); 9099 return; 9100 } 9101 } 9102 9103 fn_append(r, text, size); 9104 } 9105 9106 void render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr, 9107 appendFunc fn_append) 9108 { 9109 int i; 9110 9111 for(i = 0; attr.substr_offsets[i] < attr.size; i++) { 9112 MD_TEXTTYPE type = attr.substr_types[i]; 9113 MD_OFFSET off = attr.substr_offsets[i]; 9114 MD_SIZE size = attr.substr_offsets[i+1] - off; 9115 const MD_CHAR* text = attr.text + off; 9116 9117 switch(type) { 9118 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, &render_text); break; 9119 case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break; 9120 default: fn_append(r, text, size); break; 9121 } 9122 } 9123 } 9124 9125 9126 void render_open_ol_block(MD_RENDER_HTML* r, const(MD_BLOCK_OL_DETAIL)* det) 9127 { 9128 char[64] buf; 9129 9130 if(det.start == 1) { 9131 RENDER_LITERAL(r, "<ol>\n"); 9132 return; 9133 } 9134 9135 snprintf(buf.ptr, buf.length, "<ol start=\"%u\">\n", det.start); 9136 RENDER_LITERAL(r, buf.ptr); 9137 } 9138 9139 void render_open_li_block(MD_RENDER_HTML* r, const(MD_BLOCK_LI_DETAIL)* det) 9140 { 9141 if(det.is_task) { 9142 RENDER_LITERAL(r, "<li class=\"task-list-item\">" ~ 9143 "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled"); 9144 if(det.task_mark == 'x' || det.task_mark == 'X') 9145 RENDER_LITERAL(r, " checked"); 9146 RENDER_LITERAL(r, ">"); 9147 } else { 9148 RENDER_LITERAL(r, "<li>"); 9149 } 9150 } 9151 9152 void render_open_code_block(MD_RENDER_HTML* r, const(MD_BLOCK_CODE_DETAIL)* det) 9153 { 9154 RENDER_LITERAL(r, "<pre><code"); 9155 9156 /* If known, output the HTML 5 attribute class="language-LANGNAME". */ 9157 if(det.lang.text != null) { 9158 RENDER_LITERAL(r, " class=\"language-"); 9159 render_attribute(r, &det.lang, &render_html_escaped); 9160 RENDER_LITERAL(r, "\""); 9161 } 9162 9163 RENDER_LITERAL(r, ">"); 9164 } 9165 9166 void render_open_td_block(MD_RENDER_HTML* r, const(MD_CHAR)* cell_type, const(MD_BLOCK_TD_DETAIL)* det) 9167 { 9168 RENDER_LITERAL(r, "<"); 9169 RENDER_LITERAL(r, cell_type); 9170 9171 switch(det.align_) 9172 { 9173 case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break; 9174 case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break; 9175 case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break; 9176 default: RENDER_LITERAL(r, ">"); break; 9177 } 9178 } 9179 9180 void render_open_a_span(MD_RENDER_HTML* r, const(MD_SPAN_A_DETAIL)* det) 9181 { 9182 RENDER_LITERAL(r, "<a href=\""); 9183 render_attribute(r, &det.href, &render_url_escaped); 9184 9185 if(det.title.text != null) { 9186 RENDER_LITERAL(r, "\" title=\""); 9187 render_attribute(r, &det.title, &render_html_escaped); 9188 } 9189 9190 RENDER_LITERAL(r, "\">"); 9191 } 9192 9193 void render_open_img_span(MD_RENDER_HTML* r, const(MD_SPAN_IMG_DETAIL)* det) 9194 { 9195 RENDER_LITERAL(r, "<img src=\""); 9196 render_attribute(r, &det.src, &render_url_escaped); 9197 9198 RENDER_LITERAL(r, "\" alt=\""); 9199 9200 r.image_nesting_level++; 9201 } 9202 9203 void render_close_img_span(MD_RENDER_HTML* r, const(MD_SPAN_IMG_DETAIL)* det) 9204 { 9205 if(det.title.text != null) { 9206 RENDER_LITERAL(r, "\" title=\""); 9207 render_attribute(r, &det.title, &render_html_escaped); 9208 } 9209 RENDER_LITERAL(r, "\" />"); 9210 r.image_nesting_level--; 9211 } 9212 9213 9214 /************************************** 9215 *** HTML renderer implementation *** 9216 **************************************/ 9217 9218 int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 9219 { 9220 static immutable(MD_CHAR)*[6] head = [ "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" ]; 9221 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9222 9223 switch(type) 9224 { 9225 case MD_BLOCK_DOC: /* noop */ break; 9226 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "<blockquote>\n"); break; 9227 case MD_BLOCK_UL: RENDER_LITERAL(r, "<ul>\n"); break; 9228 case MD_BLOCK_OL: render_open_ol_block(r, cast(const(MD_BLOCK_OL_DETAIL)*)detail); break; 9229 case MD_BLOCK_LI: render_open_li_block(r, cast(const(MD_BLOCK_LI_DETAIL)*)detail); break; 9230 case MD_BLOCK_HR: RENDER_LITERAL(r, "<hr />\n"); break; 9231 case MD_BLOCK_H: RENDER_LITERAL(r, head[(cast(MD_BLOCK_H_DETAIL*)detail).level - 1]); break; 9232 case MD_BLOCK_CODE: render_open_code_block(r, cast(const(MD_BLOCK_CODE_DETAIL)*) detail); break; 9233 case MD_BLOCK_HTML: /* noop */ break; 9234 case MD_BLOCK_P: RENDER_LITERAL(r, "<p>"); break; 9235 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "<table>\n"); break; 9236 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "<thead>\n"); break; 9237 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "<tbody>\n"); break; 9238 case MD_BLOCK_TR: RENDER_LITERAL(r, "<tr>\n"); break; 9239 case MD_BLOCK_TH: render_open_td_block(r, "th", cast(MD_BLOCK_TD_DETAIL*)detail); break; 9240 case MD_BLOCK_TD: render_open_td_block(r, "td", cast(MD_BLOCK_TD_DETAIL*)detail); break; 9241 default: assert(false); 9242 } 9243 9244 return 0; 9245 } 9246 9247 int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 9248 { 9249 static immutable(MD_CHAR)*[6] head = [ "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" ]; 9250 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9251 9252 switch(type) { 9253 case MD_BLOCK_DOC: /*noop*/ break; 9254 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "</blockquote>\n"); break; 9255 case MD_BLOCK_UL: RENDER_LITERAL(r, "</ul>\n"); break; 9256 case MD_BLOCK_OL: RENDER_LITERAL(r, "</ol>\n"); break; 9257 case MD_BLOCK_LI: RENDER_LITERAL(r, "</li>\n"); break; 9258 case MD_BLOCK_HR: /*noop*/ break; 9259 case MD_BLOCK_H: RENDER_LITERAL(r, head[(cast(MD_BLOCK_H_DETAIL*)detail).level - 1]); break; 9260 case MD_BLOCK_CODE: RENDER_LITERAL(r, "</code></pre>\n"); break; 9261 case MD_BLOCK_HTML: /* noop */ break; 9262 case MD_BLOCK_P: RENDER_LITERAL(r, "</p>\n"); break; 9263 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "</table>\n"); break; 9264 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "</thead>\n"); break; 9265 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "</tbody>\n"); break; 9266 case MD_BLOCK_TR: RENDER_LITERAL(r, "</tr>\n"); break; 9267 case MD_BLOCK_TH: RENDER_LITERAL(r, "</th>\n"); break; 9268 case MD_BLOCK_TD: RENDER_LITERAL(r, "</td>\n"); break; 9269 default: assert(false); 9270 } 9271 9272 return 0; 9273 } 9274 9275 int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 9276 { 9277 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9278 9279 if(r.image_nesting_level > 0) { 9280 /* We are inside an image, i.e. rendering the ALT attribute of 9281 * <IMG> tag. */ 9282 return 0; 9283 } 9284 9285 switch(type) { 9286 case MD_SPAN_EM: RENDER_LITERAL(r, "<em>"); break; 9287 case MD_SPAN_STRONG: RENDER_LITERAL(r, "<strong>"); break; 9288 case MD_SPAN_A: render_open_a_span(r, cast(MD_SPAN_A_DETAIL*) detail); break; 9289 case MD_SPAN_IMG: render_open_img_span(r, cast(MD_SPAN_IMG_DETAIL*) detail); break; 9290 case MD_SPAN_CODE: RENDER_LITERAL(r, "<code>"); break; 9291 case MD_SPAN_DEL: RENDER_LITERAL(r, "<del>"); break; 9292 case MD_SPAN_LATEXMATH: RENDER_LITERAL(r, "<equation>"); break; 9293 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_LITERAL(r, "<equation type=\"display\">"); break; 9294 default: assert(false); 9295 } 9296 9297 return 0; 9298 } 9299 9300 int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 9301 { 9302 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9303 9304 if(r.image_nesting_level > 0) { 9305 /* We are inside an image, i.e. rendering the ALT attribute of 9306 * <IMG> tag. */ 9307 if(r.image_nesting_level == 1 && type == MD_SPAN_IMG) 9308 render_close_img_span(r, cast(MD_SPAN_IMG_DETAIL*) detail); 9309 return 0; 9310 } 9311 9312 switch(type) { 9313 case MD_SPAN_EM: RENDER_LITERAL(r, "</em>"); break; 9314 case MD_SPAN_STRONG: RENDER_LITERAL(r, "</strong>"); break; 9315 case MD_SPAN_A: RENDER_LITERAL(r, "</a>"); break; 9316 case MD_SPAN_IMG: /*noop, handled above*/ break; 9317 case MD_SPAN_CODE: RENDER_LITERAL(r, "</code>"); break; 9318 case MD_SPAN_DEL: RENDER_LITERAL(r, "</del>"); break; 9319 case MD_SPAN_LATEXMATH: /*fall through*/ 9320 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_LITERAL(r, "</equation>"); break; 9321 default: assert(false); 9322 } 9323 9324 return 0; 9325 } 9326 9327 int text_callback(MD_TEXTTYPE type, const(MD_CHAR)* text, MD_SIZE size, void* userdata) 9328 { 9329 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9330 9331 switch(type) { 9332 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, &render_text); break; 9333 case MD_TEXT_BR: RENDER_LITERAL(r, (r.image_nesting_level == 0 ? "<br />\n" : " ")); break; 9334 case MD_TEXT_SOFTBR: RENDER_LITERAL(r, (r.image_nesting_level == 0 ? "\n" : " ")); break; 9335 case MD_TEXT_HTML: render_text(r, text, size); break; 9336 case MD_TEXT_ENTITY: render_entity(r, text, size, &render_html_escaped); break; 9337 default: render_html_escaped(r, text, size); break; 9338 } 9339 9340 return 0; 9341 } 9342 9343 void debug_log_callback(const(char)* msg, void* userdata) 9344 { 9345 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9346 if(r.flags & MD_RENDER_FLAG_DEBUG) 9347 fprintf(stderr, "MD4C: %s\n", msg); 9348 } 9349 9350 9351 /* Render Markdown into HTML. 9352 * 9353 * Note only contents of <body> tag is generated. Caller must generate 9354 * HTML header/footer manually before/after calling md_render_html(). 9355 * 9356 * Params input and input_size specify the Markdown input. 9357 * Callback process_output() gets called with chunks of HTML output. 9358 * (Typical implementation may just output the bytes to file or append to 9359 * some buffer). 9360 * Param userdata is just propagated back to process_output() callback. 9361 * Param parser_flags are flags from md4c.h propagated to md_parse(). 9362 * Param render_flags is bitmask of MD_RENDER_FLAG_xxxx. 9363 * 9364 * Returns -1 on error (if md_parse() fails.) 9365 * Returns 0 on success. 9366 */ 9367 int md_render_html(const(MD_CHAR)* input, MD_SIZE input_size, 9368 void function(const(MD_CHAR)*, MD_SIZE, void*) nothrow @nogc process_output, 9369 void* userdata, uint parser_flags, uint renderer_flags) 9370 { 9371 MD_RENDER_HTML render = MD_RENDER_HTML(process_output, userdata, renderer_flags, 0); 9372 render.escape_map[] = '\x00'; 9373 9374 MD_PARSER parser = MD_PARSER( 9375 0, 9376 parser_flags, 9377 &enter_block_callback, 9378 &leave_block_callback, 9379 &enter_span_callback, 9380 &leave_span_callback, 9381 &text_callback, 9382 &debug_log_callback, 9383 null 9384 ); 9385 9386 render.escape_map['"'] = 1; 9387 render.escape_map['&'] = 1; 9388 render.escape_map['<'] = 1; 9389 render.escape_map['>'] = 1; 9390 9391 return md_parse(input, input_size, &parser, cast(void*) &render); 9392 } 9393