commonmarkd source code

1 module commonmarkd;
2 
3 /// Options for Markdown parsing.
4 enum MarkdownFlag : int
5 {
6      collapseWhitespace       = 0x0001,  /** Collapse non-trivial whitespace into single ' ' */
7      permissiveATXHeaders     = 0x0002,  /** Do not require space in ATX headers ( ###header ) */
8      permissiveURLAutoLinks   = 0x0004,  /** Recognize URLs as autolinks even without '<', '>' */
9      permissiveEmailAutoLinks = 0x0008,  /** Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
10      noIndentedCodeBlocks     = 0x0010,  /** Disable indented code blocks. (Only fenced code works.) */
11      noHTMLBlocks             = 0x0020,  /** Disable raw HTML blocks. */
12      noHTMLSpans              = 0x0040,  /** Disable raw HTML (inline). */
13      tablesExtension          = 0x0100,  /** Enable tables extension. */
14      enableStrikeThrough      = 0x0200,  /** Enable strikethrough extension. */
15      permissiveWWWAutoLinks   = 0x0400,  /** Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
16      enableTaskLists          = 0x0800,  /** Enable task list extension. */
17      latexMathSpans           = 0x1000,  /** Enable $ and $$ containing LaTeX equations. */
18 
19      permissiveAutoLinks      = permissiveEmailAutoLinks | permissiveURLAutoLinks | permissiveWWWAutoLinks, /** Recognize e-mails, URL and WWW links */
20      noHTML                   = noHTMLBlocks | noHTMLSpans, /** Disable raw HTML. */
21 
22     /* Convenient sets of flags corresponding to well-known Markdown dialects.
23      *
24      * Note we may only support subset of features of the referred dialect.
25      * The constant just enables those extensions which bring us as close as
26      * possible given what features we implement.
27      *
28      * ABI compatibility note: Meaning of these can change in time as new
29      * extensions, bringing the dialect closer to the original, are implemented.
30      */
31     dialectCommonMark          = 0, /** CommonMark */
32     dialectGitHub              = (permissiveAutoLinks | tablesExtension | enableStrikeThrough | enableTaskLists), /** Github Flavoured Markdown */
33 }
34 
35 deprecated("Use convertMarkdownToHTML instead") alias convertCommonMarkToHTML = convertMarkdownToHTML;
36 
37 /// Parses a Markdown input, returns HTML. `flags` set the particular Markdown dialect that is used.
38 string convertMarkdownToHTML(const(char)[] input, MarkdownFlag flags = MarkdownFlag.dialectCommonMark)
39 {
40     import commonmarkd.md4c;
41     import core.stdc.stdlib;
42 
43     static struct GrowableBuffer
44     {
45     nothrow:
46     @nogc:
47         char* buf = null;
48         size_t size = 0;
49         size_t allocated = 0;
50 
51         void ensureSize(size_t atLeastthisSize)
52         {
53             if (atLeastthisSize > allocated)
54             {
55                 allocated = 2 * allocated + atLeastthisSize + 1; // TODO: enhancing this estimation probably beneficial to performance
56                 buf = cast(char*) realloc(buf, allocated);
57             }
58 
59         }
60 
61         ~this()
62         {
63             if (buf)
64             {
65                 free(buf);
66                 buf = null;
67                 size = 0;
68                 allocated = 0;
69             }
70         }
71 
72         void append(const(char)[] suffix)
73         {
74             size_t L = suffix.length;
75             ensureSize(size + L);            
76             buf[size..size+L] = suffix[0..L];
77             size += L;
78         }
79 
80         const(char)[] getData()
81         {
82             return buf[0..size];
83         }
84 
85         static void appendCallback(const(char)* chars, uint size, void* userData)
86         {
87             GrowableBuffer* gb = cast(GrowableBuffer*) userData;
88             gb.append(chars[0..size]);
89         }
90     }
91 
92     GrowableBuffer gb;
93     gb.ensureSize(input.length); // TODO: enhancing this estimation probably beneficial to performance
94 
95     //int renderFlags = MD_RENDER_FLAG_DEBUG;
96     int renderFlags = 0;
97 
98     int ret = md_render_html(input.ptr, 
99                              cast(uint) input.length,
100                              &GrowableBuffer.appendCallback,
101                              &gb, flags, renderFlags);
102     return gb.getData.idup; // Note: this is the only GC-using stuff
103 }
104 
105 // Execute the CommonMark specification test suite
106 unittest
107 //void main()
108 {
109     import std.file;
110     import std.json;
111     import std.stdio;
112     import std..string;
113 
114     const(char)[] json = cast(char[]) std.file.read("spec-tests.json");
115     JSONValue root = parseJSON(json);
116     assert(root.type() == JSONType.array);
117 
118     JSONValue[] tests = root.array;
119 
120     writefln("%s tests parsed.", tests.length);
121 
122     int numPASS = 0;
123     int numFAIL = 0;
124     for (size_t n = 0; n < tests.length; ++n)
125     {
126         JSONValue test = tests[n];
127         string markdown = test["markdown"].str;
128         string expectedHTML = test["html"].str;
129         long example = test["example"].integer;
130 
131         string html;
132         try
133         {
134             html = convertMarkdownToHTML(markdown, MarkdownFlag.dialectCommonMark);
135         }
136         catch(Throwable t)
137         {
138             html = t.msg;
139         }
140 
141         // Note: It seems Markdown spec says nothing about what line endings should get generated.
142         // So we replace every \r\n by just \n before comparison, else it create bugs depending
143         // on which system CommonMark test suite has been generated.
144         html = html.replace("\r\n", "\n");
145         expectedHTML = expectedHTML.replace("\r\n", "\n");
146 
147         // Poor attempt at HTML normalization
148         html = html.replace("\n", "");
149         expectedHTML = expectedHTML.replace("\n", "");
150 
151         if ( html == expectedHTML )
152         {
153             numPASS++;
154         }
155         else
156         {
157             long start_line = test["start_line"].integer; 
158             long end_line = test["end_line"].integer;
159             string section = test["section"].str;
160             writef("Test %d: ", example);
161             writefln("FAIL\n***Markdown:\n\n%s\n\n*** Expected (length %s):\n\n%s\n\n*** Got instead (length %s):\n\n%s\n\nSee specifications lines %d-%d section %s", 
162                       markdown, expectedHTML.length, expectedHTML, html.length, html, start_line, end_line, section);
163             numFAIL++;
164         }
165     }
166 
167     writefln("%s / %s tests passed.", numPASS, tests.length);
168 
169 }