startgitStatic page generator for git repositories |
git clone git://git.dimitrijedobrota.com/startgit.git |
Log | Files | Refs | README | LICENSE | HACKING | CONTRIBUTING | CODE_OF_CONDUCT | BUILDING |
html.cpp (20079B)
0 #include <array>
1 #include <format>
2 #include <functional>
3 #include <string>
5 #include <md4c-html.h>
7 #include "arguments.hpp"
9 constexpr bool isdigit(char chr)
10 {
11 return '0' <= (chr) && (chr) <= '9';
12 }
14 constexpr bool islower(char chr)
15 {
16 return 'a' <= (chr) && (chr) <= 'z';
17 }
19 constexpr bool isupper(char chr)
20 {
21 return 'A' <= (chr) && (chr) <= 'Z';
22 }
24 constexpr bool isalnum(char chr)
25 {
26 return islower(chr) || isupper(chr) || isdigit(chr);
27 }
29 class md_html
30 {
31 public:
32 static bool need_html_esc(char chr)
33 {
34 return escape_map[static_cast<size_t>(chr)] & esc_flag::html; // NOLINT
35 }
37 static bool need_url_esc(char chr)
38 {
39 return escape_map[static_cast<size_t>(chr)] & esc_flag::url; // NOLINT
40 }
42 using append_fn = void (md_html::*)(const MD_CHAR*, MD_SIZE);
44 void render_verbatim(const std::string& text);
45 void render_verbatim(const MD_CHAR* text, MD_SIZE size);
47 void render_html_escaped(const MD_CHAR* data, MD_SIZE size);
48 void render_url_escaped(const MD_CHAR* data, MD_SIZE size);
49 void render_utf8_codepoint(unsigned codepoint, append_fn fn_append);
50 void render_entity(const MD_CHAR* text, MD_SIZE size, append_fn fn_append);
51 void render_attribute(const MD_ATTRIBUTE* attr, append_fn fn_append);
52 void render_open_ol_block(const MD_BLOCK_OL_DETAIL* det);
53 void render_open_li_block(const MD_BLOCK_LI_DETAIL* det);
54 void render_open_code_block(const MD_BLOCK_CODE_DETAIL* det);
55 void render_open_td_block(const MD_CHAR* cell_type,
56 const MD_BLOCK_TD_DETAIL* det);
57 void render_open_a_span(const MD_SPAN_A_DETAIL* det);
58 void render_open_img_span(const MD_SPAN_IMG_DETAIL* det);
59 void render_close_img_span(const MD_SPAN_IMG_DETAIL* det);
60 void render_open_wikilink_span(const MD_SPAN_WIKILINK_DETAIL* det);
62 void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
63 void* userdata;
64 unsigned flags;
65 int image_nesting_level;
67 private:
68 enum esc_flag : unsigned char
69 {
70 html = 0x1U,
71 url = 0x2U
72 };
74 static constexpr const std::array<unsigned char, 256> escape_map = []()
75 {
76 std::array<unsigned char, 256> res = {};
77 const std::string url_esc = "~-_.+!*(),%#@?=;:/,+$";
78 const std::string html_esc = "\"&<>";
80 for (size_t i = 0; i < res.size(); ++i) {
81 const auto chr = static_cast<char>(i);
83 if (html_esc.find(chr) != std::string::npos) {
84 res[i] |= esc_flag::html; // NOLINT
85 }
87 if (!isalnum(chr) && url_esc.find(chr) == std::string::npos) {
88 res[i] |= esc_flag::url; // NOLINT
89 }
90 }
92 return res;
93 }();
94 };
95 /*****************************************
96 *** HTML rendering helper functions ***
97 *****************************************/
99 void md_html::render_verbatim(const MD_CHAR* text, MD_SIZE size) // NOLINT
100 {
101 process_output(text, size, userdata);
102 }
104 void md_html::render_verbatim(const std::string& text) // NOLINT
105 {
106 process_output(text.data(), static_cast<MD_SIZE>(text.size()), userdata);
107 }
109 void md_html::render_html_escaped(const MD_CHAR* data, MD_SIZE size)
110 {
111 MD_OFFSET beg = 0;
112 MD_OFFSET off = 0;
114 while (true) {
115 /* Optimization: Use some loop unrolling. */
116 while (off + 3 < size && !md_html::need_html_esc(data[off + 0]) // NOLINT
117 && !md_html::need_html_esc(data[off + 1]) // NOLINT
118 && !md_html::need_html_esc(data[off + 2]) // NOLINT
119 && !md_html::need_html_esc(data[off + 3])) // NOLINT
120 {
121 off += 4;
122 }
124 while (off < size && !md_html::need_html_esc(data[off])) { // NOLINT
125 off++;
126 }
128 if (off > beg) {
129 render_verbatim(data + beg, off - beg); // NOLINT
130 }
132 if (off < size) {
133 switch (data[off]) { // NOLINT
134 case '&':
135 render_verbatim("&");
136 break;
137 case '<':
138 render_verbatim("<");
139 break;
140 case '>':
141 render_verbatim(">");
142 break;
143 case '"':
144 render_verbatim(""");
145 break;
146 }
147 off++;
148 } else {
149 break;
150 }
151 beg = off;
152 }
153 }
155 std::string translate_url(const MD_CHAR* data, MD_SIZE size)
156 {
157 auto url = std::string(data, size);
159 if (url.rfind("http", 0) != std::string::npos
160 || url.rfind("www", 0) != std::string::npos)
161 {
162 const std::string github = "github.com/" + startgit::args.github;
163 const std::size_t gpos = url.find(github);
164 if (gpos != std::string::npos) {
165 url = startgit::args.base_url + url.substr(gpos + github.size());
167 static const std::string blob = "/blob";
168 const std::size_t bpos = url.find(blob);
169 if (bpos != std::string::npos) {
170 url.replace(bpos, blob.size(), "");
172 const std::size_t rslash = url.rfind('/');
174 auto itr = startgit::args.special.find(url.substr(rslash + 1));
175 if (itr != startgit::args.special.end()) {
176 auto cpy = *itr;
177 url = std::format("{}/{}.html",
178 url.substr(0, rslash),
179 cpy.replace_extension().string());
180 } else {
181 const std::size_t slash = url.find('/', bpos + 1);
182 url.replace(slash, 1, "/file/");
183 url += ".html";
184 }
185 } else {
186 url += "/master/log.html";
187 }
188 }
189 } else {
190 auto itr = startgit::args.special.find(url);
191 if (itr != startgit::args.special.end()) {
192 auto cpy = *itr;
193 url = std::format("./{}.html", cpy.replace_extension().string());
194 } else {
195 url = std::format("./file/{}.html", url);
196 }
197 }
199 return url;
200 }
202 void md_html::render_url_escaped(const MD_CHAR* data, MD_SIZE size)
203 {
204 static const MD_CHAR* hex_chars = "0123456789ABCDEF";
205 MD_OFFSET beg = 0;
206 MD_OFFSET off = 0;
208 const auto url = translate_url(data, size);
209 size = static_cast<unsigned>(url.size());
210 data = url.data();
212 while (true) {
213 while (off < size && !md_html::need_url_esc(data[off])) { // NOLINT
214 off++;
215 }
217 if (off > beg) {
218 render_verbatim(data + beg, off - beg); // NOLINT
219 }
221 if (off < size) {
222 std::array<char, 3> hex = {0};
224 switch (data[off]) { // NOLINT
225 case '&':
226 render_verbatim("&");
227 break;
228 default:
229 hex[0] = '%';
230 hex[1] = hex_chars[(static_cast<unsigned>(data[off]) >> 4) // NOLINT
231 & 0xf]; // NOLINT
232 hex[2] = hex_chars[(static_cast<unsigned>(data[off]) >> 0) // NOLINT
233 & 0xf]; // NOLINT
234 render_verbatim(hex.data(), 3);
235 break;
236 }
237 off++;
238 } else {
239 break;
240 }
242 beg = off;
243 }
244 }
246 unsigned hex_val(char chr)
247 {
248 if ('0' <= chr && chr <= '9') {
249 return static_cast<unsigned>(chr - '0'); // NOLINT
250 }
252 if ('A' <= chr && chr <= 'Z') {
253 return static_cast<unsigned>(chr - 'A' + 10); // NOLINT
254 }
256 return static_cast<unsigned>(chr - 'a' + 10); // NOLINT
257 }
259 // NOLINTBEGIN
260 void md_html::render_utf8_codepoint(unsigned codepoint, append_fn fn_append)
261 {
262 static const MD_CHAR utf8_replacement_char[] = {
263 char(0xef), char(0xbf), char(0xbd)};
265 unsigned char utf8[4];
266 size_t n;
268 if (codepoint <= 0x7f) {
269 n = 1;
270 utf8[0] = static_cast<unsigned char>(codepoint);
271 } else if (codepoint <= 0x7ff) {
272 n = 2;
273 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
274 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
275 } else if (codepoint <= 0xffff) {
276 n = 3;
277 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
278 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
279 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
280 } else {
281 n = 4;
282 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
283 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
284 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
285 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
286 }
288 if (0 < codepoint && codepoint <= 0x10ffff) {
289 std::invoke(fn_append,
290 this,
291 reinterpret_cast<char*>(utf8),
292 static_cast<MD_SIZE>(n)); // NOLINT
293 } else {
294 std::invoke(fn_append, this, utf8_replacement_char, 3);
295 }
296 }
297 // NOLINTEND
299 /* Translate entity to its UTF-8 equivalent, or output the verbatim one
300 * if such entity is unknown (or if the translation is disabled). */
301 void md_html::render_entity(const MD_CHAR* text,
302 MD_SIZE size,
303 append_fn fn_append)
304 {
305 /* We assume UTF-8 output is what is desired. */
306 if (size > 3 && text[1] == '#') { // NOLINT
307 unsigned codepoint = 0;
309 if (text[2] == 'x' || text[2] == 'X') { // NOLINT
310 /* Hexadecimal entity (e.g. "�")). */
311 for (MD_SIZE idx = 3; idx < size - 1; idx++) {
312 codepoint = 16 * codepoint + hex_val(text[idx]); // NOLINT
313 }
314 } else {
315 /* Decimal entity (e.g. "&1234;") */
316 for (MD_SIZE idx = 2; idx < size - 1; idx++) {
317 codepoint =
318 10 * codepoint + static_cast<unsigned>(text[idx] - '0'); // NOLINT
319 }
320 }
322 render_utf8_codepoint(codepoint, fn_append);
323 return;
324 }
326 std::invoke(fn_append, this, text, size);
327 }
329 void md_html::render_attribute(const MD_ATTRIBUTE* attr, append_fn fn_append)
330 {
331 for (int i = 0; attr->substr_offsets[i] < attr->size; i++) { // NOLINT
332 MD_TEXTTYPE type = attr->substr_types[i]; // NOLINT
333 MD_OFFSET off = attr->substr_offsets[i]; // NOLINT
334 MD_SIZE size = attr->substr_offsets[i + 1] - off; // NOLINT
335 const MD_CHAR* text = attr->text + off; // NOLINT
337 switch (type) {
338 case MD_TEXT_NULLCHAR:
339 render_utf8_codepoint(0x0000, &md_html::render_verbatim);
340 break;
341 case MD_TEXT_ENTITY:
342 render_entity(text, size, fn_append);
343 break;
344 default:
345 std::invoke(fn_append, this, text, size);
346 break;
347 }
348 }
349 }
351 void md_html::render_open_ol_block(const MD_BLOCK_OL_DETAIL* det)
352 {
353 if (det->start == 1) {
354 render_verbatim("<ol>\n");
355 return;
356 }
358 const auto buf = std::format(R"(<ol start="{}">\n)", det->start);
359 render_verbatim(buf);
360 }
362 void md_html::render_open_li_block(const MD_BLOCK_LI_DETAIL* det)
363 {
364 if (det->is_task != 0) {
365 render_verbatim(
366 "<li class=\"task-list-item\">"
367 "<input type=\"checkbox\" "
368 "class=\"task-list-item-checkbox\" disabled");
369 if (det->task_mark == 'x' || det->task_mark == 'X') {
370 render_verbatim(" checked");
371 }
372 render_verbatim(">");
373 } else {
374 render_verbatim("<li>");
375 }
376 }
378 void md_html::render_open_code_block(const MD_BLOCK_CODE_DETAIL* det)
379 {
380 render_verbatim("<pre><code");
382 /* If known, output the HTML 5 attribute class="language-LANGNAME". */
383 if (det->lang.text != nullptr) {
384 render_verbatim(" class=\"language-");
385 render_attribute(&det->lang, &md_html::render_html_escaped);
386 render_verbatim("\"");
387 }
389 render_verbatim(">");
390 }
392 void md_html::render_open_td_block(const MD_CHAR* cell_type,
393 const MD_BLOCK_TD_DETAIL* det)
394 {
395 render_verbatim("<");
396 render_verbatim(cell_type);
398 switch (det->align) {
399 case MD_ALIGN_LEFT:
400 render_verbatim(" align=\"left\">");
401 break;
402 case MD_ALIGN_CENTER:
403 render_verbatim(" align=\"center\">");
404 break;
405 case MD_ALIGN_RIGHT:
406 render_verbatim(" align=\"right\">");
407 break;
408 default:
409 render_verbatim(">");
410 break;
411 }
412 }
414 void md_html::render_open_a_span(const MD_SPAN_A_DETAIL* det)
415 {
416 render_verbatim("<a href=\"");
417 render_attribute(&det->href, &md_html::render_url_escaped);
419 if (det->title.text != nullptr) {
420 render_verbatim("\" title=\"");
421 render_attribute(&det->title, &md_html::render_html_escaped);
422 }
424 render_verbatim("\">");
425 }
427 void md_html::render_open_img_span(const MD_SPAN_IMG_DETAIL* det)
428 {
429 render_verbatim("<img src=\"");
430 render_attribute(&det->src, &md_html::render_url_escaped);
432 render_verbatim("\" alt=\"");
433 }
435 void md_html::render_close_img_span(const MD_SPAN_IMG_DETAIL* det)
436 {
437 if (det->title.text != nullptr) {
438 render_verbatim("\" title=\"");
439 render_attribute(&det->title, &md_html::render_html_escaped);
440 }
442 render_verbatim("\">");
443 }
445 void md_html::render_open_wikilink_span(const MD_SPAN_WIKILINK_DETAIL* det)
446 {
447 render_verbatim("<x-wikilink data-target=\"");
448 render_attribute(&det->target, &md_html::render_html_escaped);
450 render_verbatim("\">");
451 }
453 /**************************************
454 *** HTML renderer implementation ***
455 **************************************/
457 int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
458 {
459 static const MD_CHAR* head[] = {// NOLINT
460 "<h1>",
461 "<h2>",
462 "<h3>",
463 "<h4>",
464 "<h5>",
465 "<h6>"};
466 auto* data = static_cast<class md_html*>(userdata);
468 switch (type) {
469 case MD_BLOCK_DOC: /* noop */
470 break;
471 case MD_BLOCK_QUOTE:
472 data->render_verbatim("<blockquote>\n");
473 break;
474 case MD_BLOCK_UL:
475 data->render_verbatim("<ul>\n");
476 break;
477 case MD_BLOCK_OL:
478 data->render_open_ol_block(
479 static_cast<const MD_BLOCK_OL_DETAIL*>(detail));
480 break;
481 case MD_BLOCK_LI:
482 data->render_open_li_block(
483 static_cast<const MD_BLOCK_LI_DETAIL*>(detail));
484 break;
485 case MD_BLOCK_HR:
486 data->render_verbatim("<hr>\n");
487 break;
488 case MD_BLOCK_H:
489 data->render_verbatim(
490 head[static_cast<MD_BLOCK_H_DETAIL*>(detail)->level - 1]); // NOLINT
491 break;
492 case MD_BLOCK_CODE:
493 data->render_open_code_block(
494 static_cast<const MD_BLOCK_CODE_DETAIL*>(detail));
495 break;
496 case MD_BLOCK_HTML: /* noop */
497 break;
498 case MD_BLOCK_P:
499 data->render_verbatim("<p>");
500 break;
501 case MD_BLOCK_TABLE:
502 data->render_verbatim("<table>\n");
503 break;
504 case MD_BLOCK_THEAD:
505 data->render_verbatim("<thead>\n");
506 break;
507 case MD_BLOCK_TBODY:
508 data->render_verbatim("<tbody>\n");
509 break;
510 case MD_BLOCK_TR:
511 data->render_verbatim("<tr>\n");
512 break;
513 case MD_BLOCK_TH:
514 data->render_open_td_block("th",
515 static_cast<MD_BLOCK_TD_DETAIL*>(detail));
516 break;
517 case MD_BLOCK_TD:
518 data->render_open_td_block("td",
519 static_cast<MD_BLOCK_TD_DETAIL*>(detail));
520 break;
521 }
523 return 0;
524 }
526 int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
527 {
528 static const MD_CHAR* head[] = {// NOLINT
529 "</h1>\n",
530 "</h2>\n",
531 "</h3>\n",
532 "</h4>\n",
533 "</h5>\n",
534 "</h6>\n"};
535 auto* data = static_cast<class md_html*>(userdata);
537 switch (type) {
538 case MD_BLOCK_DOC: /*noop*/
539 break;
540 case MD_BLOCK_QUOTE:
541 data->render_verbatim("</blockquote>\n");
542 break;
543 case MD_BLOCK_UL:
544 data->render_verbatim("</ul>\n");
545 break;
546 case MD_BLOCK_OL:
547 data->render_verbatim("</ol>\n");
548 break;
549 case MD_BLOCK_LI:
550 data->render_verbatim("</li>\n");
551 break;
552 case MD_BLOCK_HR: /*noop*/
553 break;
554 case MD_BLOCK_H:
555 data->render_verbatim(
556 head[static_cast<MD_BLOCK_H_DETAIL*>(detail)->level - 1]); // NOLINT
557 break;
558 case MD_BLOCK_CODE:
559 data->render_verbatim("</code></pre>\n");
560 break;
561 case MD_BLOCK_HTML: /* noop */
562 break;
563 case MD_BLOCK_P:
564 data->render_verbatim("</p>\n");
565 break;
566 case MD_BLOCK_TABLE:
567 data->render_verbatim("</table>\n");
568 break;
569 case MD_BLOCK_THEAD:
570 data->render_verbatim("</thead>\n");
571 break;
572 case MD_BLOCK_TBODY:
573 data->render_verbatim("</tbody>\n");
574 break;
575 case MD_BLOCK_TR:
576 data->render_verbatim("</tr>\n");
577 break;
578 case MD_BLOCK_TH:
579 data->render_verbatim("</th>\n");
580 break;
581 case MD_BLOCK_TD:
582 data->render_verbatim("</td>\n");
583 break;
584 }
586 return 0;
587 }
589 int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
590 {
591 auto* data = static_cast<class md_html*>(userdata);
592 const bool inside_img = (data->image_nesting_level > 0);
594 if (type == MD_SPAN_IMG) {
595 data->image_nesting_level++;
596 }
598 if (inside_img) {
599 return 0;
600 }
602 switch (type) {
603 case MD_SPAN_EM:
604 data->render_verbatim("<em>");
605 break;
606 case MD_SPAN_STRONG:
607 data->render_verbatim("<strong>");
608 break;
609 case MD_SPAN_U:
610 data->render_verbatim("<u>");
611 break;
612 case MD_SPAN_A:
613 data->render_open_a_span(static_cast<MD_SPAN_A_DETAIL*>(detail));
614 break;
615 case MD_SPAN_IMG:
616 data->render_open_img_span(static_cast<MD_SPAN_IMG_DETAIL*>(detail));
617 break;
618 case MD_SPAN_CODE:
619 data->render_verbatim("<code>");
620 break;
621 case MD_SPAN_DEL:
622 data->render_verbatim("<del>");
623 break;
624 case MD_SPAN_LATEXMATH:
625 data->render_verbatim("<x-equation>");
626 break;
627 case MD_SPAN_LATEXMATH_DISPLAY:
628 data->render_verbatim("<x-equation type=\"display\">");
629 break;
630 case MD_SPAN_WIKILINK:
631 data->render_open_wikilink_span(
632 static_cast<MD_SPAN_WIKILINK_DETAIL*>(detail));
633 break;
634 }
636 return 0;
637 }
639 int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
640 {
641 auto* data = static_cast<class md_html*>(userdata);
643 if (type == MD_SPAN_IMG) {
644 data->image_nesting_level--;
645 }
647 if (data->image_nesting_level > 0) {
648 return 0;
649 }
651 switch (type) {
652 case MD_SPAN_EM:
653 data->render_verbatim("</em>");
654 break;
655 case MD_SPAN_STRONG:
656 data->render_verbatim("</strong>");
657 break;
658 case MD_SPAN_U:
659 data->render_verbatim("</u>");
660 break;
661 case MD_SPAN_A:
662 data->render_verbatim("</a>");
663 break;
664 case MD_SPAN_IMG:
665 data->render_close_img_span(static_cast<MD_SPAN_IMG_DETAIL*>(detail));
666 break;
667 case MD_SPAN_CODE:
668 data->render_verbatim("</code>");
669 break;
670 case MD_SPAN_DEL:
671 data->render_verbatim("</del>");
672 break;
673 case MD_SPAN_LATEXMATH: /*fall through*/
674 case MD_SPAN_LATEXMATH_DISPLAY:
675 data->render_verbatim("</x-equation>");
676 break;
677 case MD_SPAN_WIKILINK:
678 data->render_verbatim("</x-wikilink>");
679 break;
680 }
682 return 0;
683 }
685 int text_callback(MD_TEXTTYPE type,
686 const MD_CHAR* text,
687 MD_SIZE size,
688 void* userdata)
689 {
690 auto* data = static_cast<class md_html*>(userdata);
692 switch (type) {
693 case MD_TEXT_NULLCHAR:
694 data->render_utf8_codepoint(0x0000, &md_html::render_verbatim);
695 break;
696 case MD_TEXT_BR:
697 data->render_verbatim(
698 (data->image_nesting_level == 0 ? ("<br>\n") : " "));
699 break;
700 case MD_TEXT_SOFTBR:
701 data->render_verbatim((data->image_nesting_level == 0 ? "\n" : " "));
702 break;
703 case MD_TEXT_HTML:
704 data->render_verbatim(text, size);
705 break;
706 case MD_TEXT_ENTITY:
707 data->render_entity(text, size, &md_html::render_html_escaped);
708 break;
709 default:
710 data->render_html_escaped(text, size);
711 break;
712 }
714 return 0;
715 }
717 namespace startgit
718 {
720 int md_html(const MD_CHAR* input,
721 MD_SIZE input_size,
722 void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
723 void* userdata,
724 unsigned parser_flags,
725 unsigned renderer_flags)
726 {
727 class md_html render = {process_output, userdata, renderer_flags, 0};
729 const MD_PARSER parser = {0,
730 parser_flags,
731 enter_block_callback,
732 leave_block_callback,
733 enter_span_callback,
734 leave_span_callback,
735 text_callback,
736 nullptr,
737 nullptr};
739 return md_parse(input, input_size, &parser, &render);
740 }
742 } // namespace startgit