startgit

Static page generator for git repositories
git clone git://git.dimitrijedobrota.com/startgit.git
Log | Files | Refs | README | LICENSE | HACKING | CONTRIBUTING | CODE_OF_CONDUCT | BUILDING

html.cpp (19813B)


0 #include <array>
1 #include <format>
2 #include <functional>
3 #include <string>
5 #include <md4c-html.h>
7 #include "arguments.hpp"
9 constexpr bool isdigit(char chr)
10 {
11 return '0' <= (chr) && (chr) <= '9';
12 }
14 constexpr bool islower(char chr)
15 {
16 return 'a' <= (chr) && (chr) <= 'z';
17 }
19 constexpr bool isupper(char chr)
20 {
21 return 'A' <= (chr) && (chr) <= 'Z';
22 }
24 constexpr bool isalnum(char chr)
25 {
26 return islower(chr) || isupper(chr) || isdigit(chr);
27 }
29 class md_html
30 {
31 public:
32 static bool need_html_esc(char chr)
33 {
34 return escape_map[static_cast<size_t>(chr)] & esc_flag::html; // NOLINT
35 }
37 static bool need_url_esc(char chr)
38 {
39 return escape_map[static_cast<size_t>(chr)] & esc_flag::url; // NOLINT
40 }
42 using append_fn = void (md_html::*)(const MD_CHAR*, MD_SIZE);
44 void render_verbatim(const std::string& text);
45 void render_verbatim(const MD_CHAR* text, MD_SIZE size);
47 void render_html_escaped(const MD_CHAR* data, MD_SIZE size);
48 void render_url_escaped(const MD_CHAR* data, MD_SIZE size);
49 void render_utf8_codepoint(unsigned codepoint, append_fn fn_append);
50 void render_entity(const MD_CHAR* text, MD_SIZE size, append_fn fn_append);
51 void render_attribute(const MD_ATTRIBUTE* attr, append_fn fn_append);
52 void render_open_ol_block(const MD_BLOCK_OL_DETAIL* det);
53 void render_open_li_block(const MD_BLOCK_LI_DETAIL* det);
54 void render_open_code_block(const MD_BLOCK_CODE_DETAIL* det);
55 void render_open_td_block(
56 const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det
57 );
58 void render_open_a_span(const MD_SPAN_A_DETAIL* det);
59 void render_open_img_span(const MD_SPAN_IMG_DETAIL* det);
60 void render_close_img_span(const MD_SPAN_IMG_DETAIL* det);
61 void render_open_wikilink_span(const MD_SPAN_WIKILINK_DETAIL* det);
63 void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
64 void* userdata;
65 unsigned flags;
66 int image_nesting_level;
68 private:
69 enum esc_flag : unsigned char
70 {
71 html = 0x1U,
72 url = 0x2U
73 };
75 static constexpr const std::array<unsigned char, 256> escape_map = []()
76 {
77 std::array<unsigned char, 256> res = {};
78 const std::string url_esc = "~-_.+!*(),%#@?=;:/,+$";
79 const std::string html_esc = "\"&<>";
81 for (size_t i = 0; i < res.size(); ++i) {
82 const auto chr = static_cast<char>(i);
84 if (html_esc.find(chr) != std::string::npos) {
85 res[i] |= esc_flag::html; // NOLINT
86 }
88 if (!isalnum(chr) && url_esc.find(chr) == std::string::npos) {
89 res[i] |= esc_flag::url; // NOLINT
90 }
91 }
93 return res;
94 }();
95 };
96 /*****************************************
97 *** HTML rendering helper functions ***
98 *****************************************/
100 void md_html::render_verbatim(const MD_CHAR* text, MD_SIZE size) // NOLINT
102 process_output(text, size, userdata);
105 void md_html::render_verbatim(const std::string& text) // NOLINT
107 process_output(text.data(), static_cast<MD_SIZE>(text.size()), userdata);
110 void md_html::render_html_escaped(const MD_CHAR* data, MD_SIZE size)
112 MD_OFFSET beg = 0;
113 MD_OFFSET off = 0;
115 while (true) {
116 /* Optimization: Use some loop unrolling. */
117 while (off + 3 < size && !md_html::need_html_esc(data[off + 0]) // NOLINT
118 && !md_html::need_html_esc(data[off + 1]) // NOLINT
119 && !md_html::need_html_esc(data[off + 2]) // NOLINT
120 && !md_html::need_html_esc(data[off + 3])) // NOLINT
122 off += 4;
125 while (off < size && !md_html::need_html_esc(data[off])) { // NOLINT
126 off++;
129 if (off > beg) {
130 render_verbatim(data + beg, off - beg); // NOLINT
133 if (off < size) {
134 switch (data[off]) { // NOLINT
135 case '&':
136 render_verbatim("&amp;");
137 break;
138 case '<':
139 render_verbatim("&lt;");
140 break;
141 case '>':
142 render_verbatim("&gt;");
143 break;
144 case '"':
145 render_verbatim("&quot;");
146 break;
148 off++;
149 } else {
150 break;
152 beg = off;
156 std::string translate_url(const MD_CHAR* data, MD_SIZE size)
158 auto url = std::string(data, size);
160 if (url.rfind("http", 0) != std::string::npos
161 || url.rfind("www", 0) != std::string::npos)
163 const std::string github = "github.com/" + startgit::args.github;
164 const std::size_t gpos = url.find(github);
165 if (gpos != std::string::npos) {
166 url = startgit::args.base_url + url.substr(gpos + github.size());
168 static const std::string blob = "/blob";
169 const std::size_t bpos = url.find(blob);
170 if (bpos != std::string::npos) {
171 url.replace(bpos, blob.size(), "");
173 const std::size_t rslash = url.rfind('/');
175 auto itr = startgit::args.special.find(url.substr(rslash + 1));
176 if (itr != startgit::args.special.end()) {
177 auto cpy = *itr;
178 url = std::format(
179 "{}/{}.html",
180 url.substr(0, rslash),
181 cpy.replace_extension().string()
182 );
183 } else {
184 const std::size_t slash = url.find('/', bpos + 1);
185 url.replace(slash, 1, "/file/");
186 url += ".html";
188 } else {
189 url += "/master/log.html";
192 } else {
193 auto itr = startgit::args.special.find(url);
194 if (itr != startgit::args.special.end()) {
195 auto cpy = *itr;
196 url = std::format("./{}.html", cpy.replace_extension().string());
197 } else {
198 url = std::format("./file/{}.html", url);
202 return url;
205 void md_html::render_url_escaped(const MD_CHAR* data, MD_SIZE size)
207 static const MD_CHAR* hex_chars = "0123456789ABCDEF";
208 MD_OFFSET beg = 0;
209 MD_OFFSET off = 0;
211 const auto urll = translate_url(data, size);
212 size = static_cast<unsigned>(urll.size());
213 data = urll.data();
215 while (true) {
216 while (off < size && !md_html::need_url_esc(data[off])) { // NOLINT
217 off++;
220 if (off > beg) {
221 render_verbatim(data + beg, off - beg); // NOLINT
224 if (off < size) {
225 std::array<char, 3> hex = {0};
227 if (data[off] == '&') { // NOLINT
228 render_verbatim("&amp;");
229 } else {
230 hex[0] = '%';
231 hex[1] = hex_chars // NOLINT
232 [(static_cast<unsigned>(data[off]) >> 4) // NOLINT
233 & 0xf]; // NOLINT
234 hex[2] = hex_chars // NOLINT
235 [(static_cast<unsigned>(data[off]) >> 0) // NOLINT
236 & 0xf]; // NOLINT
237 render_verbatim(hex.data(), 3);
239 off++;
240 } else {
241 break;
244 beg = off;
248 unsigned hex_val(char chr)
250 if ('0' <= chr && chr <= '9') {
251 return static_cast<unsigned>(chr - '0'); // NOLINT
254 if ('A' <= chr && chr <= 'Z') {
255 return static_cast<unsigned>(chr - 'A' + 10); // NOLINT
258 return static_cast<unsigned>(chr - 'a' + 10); // NOLINT
261 // NOLINTBEGIN
262 void md_html::render_utf8_codepoint(unsigned codepoint, append_fn fn_append)
264 static const MD_CHAR utf8_replacement_char[] = {
265 char(0xef), char(0xbf), char(0xbd)
266 };
268 unsigned char utf8[4];
269 size_t n;
271 if (codepoint <= 0x7f) {
272 n = 1;
273 utf8[0] = static_cast<unsigned char>(codepoint);
274 } else if (codepoint <= 0x7ff) {
275 n = 2;
276 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
277 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
278 } else if (codepoint <= 0xffff) {
279 n = 3;
280 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
281 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
282 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
283 } else {
284 n = 4;
285 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
286 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
287 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
288 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
291 if (0 < codepoint && codepoint <= 0x10ffff) {
292 std::invoke(
293 fn_append,
294 this,
295 reinterpret_cast<char*>(utf8),
296 static_cast<MD_SIZE>(n)
297 ); // NOLINT
298 } else {
299 std::invoke(fn_append, this, utf8_replacement_char, 3);
302 // NOLINTEND
304 /* Translate entity to its UTF-8 equivalent, or output the verbatim one
305 * if such entity is unknown (or if the translation is disabled). */
306 void md_html::render_entity(
307 const MD_CHAR* text, MD_SIZE size, append_fn fn_append
310 /* We assume UTF-8 output is what is desired. */
311 if (size > 3 && text[1] == '#') { // NOLINT
312 unsigned codepoint = 0;
314 if (text[2] == 'x' || text[2] == 'X') { // NOLINT
315 /* Hexadecimal entity (e.g. "&#x1234abcd;")). */
316 for (MD_SIZE idx = 3; idx < size - 1; idx++) {
317 codepoint = 16 * codepoint + hex_val(text[idx]); // NOLINT
319 } else {
320 /* Decimal entity (e.g. "&1234;") */
321 for (MD_SIZE idx = 2; idx < size - 1; idx++) {
322 codepoint =
323 10 * codepoint + static_cast<unsigned>(text[idx] - '0'); // NOLINT
327 render_utf8_codepoint(codepoint, fn_append);
328 return;
331 std::invoke(fn_append, this, text, size);
334 void md_html::render_attribute(const MD_ATTRIBUTE* attr, append_fn fn_append)
336 for (int i = 0; attr->substr_offsets[i] < attr->size; i++) { // NOLINT
337 MD_TEXTTYPE type = attr->substr_types[i]; // NOLINT
338 MD_OFFSET off = attr->substr_offsets[i]; // NOLINT
339 MD_SIZE size = attr->substr_offsets[i + 1] - off; // NOLINT
340 const MD_CHAR* text = attr->text + off; // NOLINT
342 if (type == MD_TEXT_NULLCHAR) {
343 render_utf8_codepoint(0x0000, &md_html::render_verbatim);
344 continue;
347 if (type == MD_TEXT_ENTITY) {
348 render_entity(text, size, fn_append);
349 continue;
352 std::invoke(fn_append, this, text, size);
356 void md_html::render_open_ol_block(const MD_BLOCK_OL_DETAIL* det)
358 if (det->start == 1) {
359 render_verbatim("<ol>\n");
360 return;
363 const auto buf = std::format(R"(<ol start="{}">\n)", det->start);
364 render_verbatim(buf);
367 void md_html::render_open_li_block(const MD_BLOCK_LI_DETAIL* det)
369 if (det->is_task != 0) {
370 render_verbatim(
371 "<li class=\"task-list-item\">"
372 "<input type=\"checkbox\" "
373 "class=\"task-list-item-checkbox\" disabled"
374 );
375 if (det->task_mark == 'x' || det->task_mark == 'X') {
376 render_verbatim(" checked");
378 render_verbatim(">");
379 } else {
380 render_verbatim("<li>");
384 void md_html::render_open_code_block(const MD_BLOCK_CODE_DETAIL* det)
386 render_verbatim("<pre><code");
388 /* If known, output the HTML 5 attribute class="language-LANGNAME". */
389 if (det->lang.text != nullptr) {
390 render_verbatim(" class=\"language-");
391 render_attribute(&det->lang, &md_html::render_html_escaped);
392 render_verbatim("\"");
395 render_verbatim(">");
398 void md_html::render_open_td_block(
399 const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det
402 render_verbatim("<");
403 render_verbatim(cell_type);
405 switch (det->align) {
406 case MD_ALIGN_LEFT:
407 render_verbatim(" align=\"left\">");
408 break;
409 case MD_ALIGN_CENTER:
410 render_verbatim(" align=\"center\">");
411 break;
412 case MD_ALIGN_RIGHT:
413 render_verbatim(" align=\"right\">");
414 break;
415 case MD_ALIGN_DEFAULT:
416 render_verbatim(">");
417 break;
421 void md_html::render_open_a_span(const MD_SPAN_A_DETAIL* det)
423 render_verbatim("<a href=\"");
424 render_attribute(&det->href, &md_html::render_url_escaped);
426 if (det->title.text != nullptr) {
427 render_verbatim("\" title=\"");
428 render_attribute(&det->title, &md_html::render_html_escaped);
431 render_verbatim("\">");
434 void md_html::render_open_img_span(const MD_SPAN_IMG_DETAIL* det)
436 render_verbatim("<img src=\"");
437 render_attribute(&det->src, &md_html::render_url_escaped);
439 render_verbatim("\" alt=\"");
442 void md_html::render_close_img_span(const MD_SPAN_IMG_DETAIL* det)
444 if (det->title.text != nullptr) {
445 render_verbatim("\" title=\"");
446 render_attribute(&det->title, &md_html::render_html_escaped);
449 render_verbatim("\">");
452 void md_html::render_open_wikilink_span(const MD_SPAN_WIKILINK_DETAIL* det)
454 render_verbatim("<x-wikilink data-target=\"");
455 render_attribute(&det->target, &md_html::render_html_escaped);
457 render_verbatim("\">");
460 /**************************************
461 *** HTML renderer implementation ***
462 **************************************/
464 int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
466 static const MD_CHAR* head[] = {// NOLINT
467 "<h1>",
468 "<h2>",
469 "<h3>",
470 "<h4>",
471 "<h5>",
472 "<h6>"
473 };
474 auto* data = static_cast<class md_html*>(userdata);
476 switch (type) {
477 case MD_BLOCK_DOC: /* noop */
478 break;
479 case MD_BLOCK_QUOTE:
480 data->render_verbatim("<blockquote>\n");
481 break;
482 case MD_BLOCK_UL:
483 data->render_verbatim("<ul>\n");
484 break;
485 case MD_BLOCK_OL:
486 data->render_open_ol_block(static_cast<const MD_BLOCK_OL_DETAIL*>(detail)
487 );
488 break;
489 case MD_BLOCK_LI:
490 data->render_open_li_block(static_cast<const MD_BLOCK_LI_DETAIL*>(detail)
491 );
492 break;
493 case MD_BLOCK_HR:
494 data->render_verbatim("<hr>\n");
495 break;
496 case MD_BLOCK_H:
497 data->render_verbatim(
498 head[static_cast<MD_BLOCK_H_DETAIL*>(detail)->level - 1] // NOLINT
499 );
500 break;
501 case MD_BLOCK_CODE:
502 data->render_open_code_block(
503 static_cast<const MD_BLOCK_CODE_DETAIL*>(detail)
504 );
505 break;
506 case MD_BLOCK_HTML: /* noop */
507 break;
508 case MD_BLOCK_P:
509 data->render_verbatim("<p>");
510 break;
511 case MD_BLOCK_TABLE:
512 data->render_verbatim("<table>\n");
513 break;
514 case MD_BLOCK_THEAD:
515 data->render_verbatim("<thead>\n");
516 break;
517 case MD_BLOCK_TBODY:
518 data->render_verbatim("<tbody>\n");
519 break;
520 case MD_BLOCK_TR:
521 data->render_verbatim("<tr>\n");
522 break;
523 case MD_BLOCK_TH:
524 data->render_open_td_block(
525 "th", static_cast<MD_BLOCK_TD_DETAIL*>(detail)
526 );
527 break;
528 case MD_BLOCK_TD:
529 data->render_open_td_block(
530 "td", static_cast<MD_BLOCK_TD_DETAIL*>(detail)
531 );
532 break;
535 return 0;
538 int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
540 static const MD_CHAR* head[] = {// NOLINT
541 "</h1>\n",
542 "</h2>\n",
543 "</h3>\n",
544 "</h4>\n",
545 "</h5>\n",
546 "</h6>\n"
547 };
548 auto* data = static_cast<class md_html*>(userdata);
550 switch (type) {
551 case MD_BLOCK_DOC: /*noop*/
552 break;
553 case MD_BLOCK_QUOTE:
554 data->render_verbatim("</blockquote>\n");
555 break;
556 case MD_BLOCK_UL:
557 data->render_verbatim("</ul>\n");
558 break;
559 case MD_BLOCK_OL:
560 data->render_verbatim("</ol>\n");
561 break;
562 case MD_BLOCK_LI:
563 data->render_verbatim("</li>\n");
564 break;
565 case MD_BLOCK_HR: /*noop*/
566 break;
567 case MD_BLOCK_H:
568 data->render_verbatim(
569 head[static_cast<MD_BLOCK_H_DETAIL*>(detail)->level - 1] // NOLINT
570 );
571 break;
572 case MD_BLOCK_CODE:
573 data->render_verbatim("</code></pre>\n");
574 break;
575 case MD_BLOCK_HTML: /* noop */
576 break;
577 case MD_BLOCK_P:
578 data->render_verbatim("</p>\n");
579 break;
580 case MD_BLOCK_TABLE:
581 data->render_verbatim("</table>\n");
582 break;
583 case MD_BLOCK_THEAD:
584 data->render_verbatim("</thead>\n");
585 break;
586 case MD_BLOCK_TBODY:
587 data->render_verbatim("</tbody>\n");
588 break;
589 case MD_BLOCK_TR:
590 data->render_verbatim("</tr>\n");
591 break;
592 case MD_BLOCK_TH:
593 data->render_verbatim("</th>\n");
594 break;
595 case MD_BLOCK_TD:
596 data->render_verbatim("</td>\n");
597 break;
600 return 0;
603 int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
605 auto* data = static_cast<class md_html*>(userdata);
606 const bool inside_img = (data->image_nesting_level > 0);
608 if (type == MD_SPAN_IMG) {
609 data->image_nesting_level++;
612 if (inside_img) {
613 return 0;
616 switch (type) {
617 case MD_SPAN_EM:
618 data->render_verbatim("<em>");
619 break;
620 case MD_SPAN_STRONG:
621 data->render_verbatim("<strong>");
622 break;
623 case MD_SPAN_U:
624 data->render_verbatim("<u>");
625 break;
626 case MD_SPAN_A:
627 data->render_open_a_span(static_cast<MD_SPAN_A_DETAIL*>(detail));
628 break;
629 case MD_SPAN_IMG:
630 data->render_open_img_span(static_cast<MD_SPAN_IMG_DETAIL*>(detail));
631 break;
632 case MD_SPAN_CODE:
633 data->render_verbatim("<code>");
634 break;
635 case MD_SPAN_DEL:
636 data->render_verbatim("<del>");
637 break;
638 case MD_SPAN_LATEXMATH:
639 data->render_verbatim("<x-equation>");
640 break;
641 case MD_SPAN_LATEXMATH_DISPLAY:
642 data->render_verbatim("<x-equation type=\"display\">");
643 break;
644 case MD_SPAN_WIKILINK:
645 data->render_open_wikilink_span(
646 static_cast<MD_SPAN_WIKILINK_DETAIL*>(detail)
647 );
648 break;
651 return 0;
654 int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
656 auto* data = static_cast<class md_html*>(userdata);
658 if (type == MD_SPAN_IMG) {
659 data->image_nesting_level--;
662 if (data->image_nesting_level > 0) {
663 return 0;
666 switch (type) {
667 case MD_SPAN_EM:
668 data->render_verbatim("</em>");
669 break;
670 case MD_SPAN_STRONG:
671 data->render_verbatim("</strong>");
672 break;
673 case MD_SPAN_U:
674 data->render_verbatim("</u>");
675 break;
676 case MD_SPAN_A:
677 data->render_verbatim("</a>");
678 break;
679 case MD_SPAN_IMG:
680 data->render_close_img_span(static_cast<MD_SPAN_IMG_DETAIL*>(detail));
681 break;
682 case MD_SPAN_CODE:
683 data->render_verbatim("</code>");
684 break;
685 case MD_SPAN_DEL:
686 data->render_verbatim("</del>");
687 break;
688 case MD_SPAN_LATEXMATH: /*fall through*/
689 case MD_SPAN_LATEXMATH_DISPLAY:
690 data->render_verbatim("</x-equation>");
691 break;
692 case MD_SPAN_WIKILINK:
693 data->render_verbatim("</x-wikilink>");
694 break;
697 return 0;
700 int text_callback(
701 MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata
704 auto* data = static_cast<class md_html*>(userdata);
706 switch (type) {
707 case MD_TEXT_NULLCHAR:
708 data->render_utf8_codepoint(0x0000, &md_html::render_verbatim);
709 break;
710 case MD_TEXT_BR:
711 data->render_verbatim((data->image_nesting_level == 0 ? ("<br>\n") : " ")
712 );
713 break;
714 case MD_TEXT_SOFTBR:
715 data->render_verbatim((data->image_nesting_level == 0 ? "\n" : " "));
716 break;
717 case MD_TEXT_HTML:
718 data->render_verbatim(text, size);
719 break;
720 case MD_TEXT_ENTITY:
721 data->render_entity(text, size, &md_html::render_html_escaped);
722 break;
723 case MD_TEXT_NORMAL:
724 case MD_TEXT_CODE:
725 case MD_TEXT_LATEXMATH:
726 data->render_html_escaped(text, size);
727 break;
730 return 0;
733 namespace startgit
736 int md_html(
737 const MD_CHAR* input,
738 MD_SIZE input_size,
739 void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
740 void* userdata,
741 unsigned parser_flags,
742 unsigned renderer_flags
745 class md_html render = {
746 .process_output = process_output,
747 .userdata = userdata,
748 .flags = renderer_flags,
749 .image_nesting_level = 0
750 };
752 const MD_PARSER parser = {
753 0,
754 parser_flags,
755 enter_block_callback,
756 leave_block_callback,
757 enter_span_callback,
758 leave_span_callback,
759 text_callback,
760 nullptr,
761 nullptr
762 };
764 return md_parse(input, input_size, &parser, &render);
767 } // namespace startgit