tacker

a simple web bundler
git clone https://tongong.net/git/tacker.git
Log | Files | Refs | README

bundle_html.ha (6231B)


      1 use bufio;
      2 use fmt;
      3 use io;
      4 use os;
      5 use searchio;
      6 use strings;
      7 
      8 // https://html.spec.whatwg.org/multipage/syntax.html
      9 
     10 // inputs are borrowed
     11 fn tacker_html(inputpath: str, ofile: io::handle) void = {
     12 	const ifile = os::open(inputpath)!;
     13 	defer io::close(ifile)!;
     14 	const p_main = searchio::compile(["<!--", "<script", "<link",
     15 		"<audio", "<embed", "<img", "<source", "<track", "<video"]);
     16 	const p_comment = searchio::compile(["-->"]);
     17 	const p_tagclose = searchio::compile([">"]);
     18 	const p_scriptclose = searchio::compile(["</script>"]);
     19 	defer searchio::finish(p_main);
     20 	defer searchio::finish(p_comment);
     21 	defer searchio::finish(p_tagclose);
     22 	defer searchio::finish(p_scriptclose);
     23 
     24 	for (true) {
     25 		const m = searchio::search(ifile, ofile, p_main);
     26 		if (m is size) {
     27 			const m = m: size;
     28 
     29 			let tagbuf = bufio::dynamic(io::mode::RDWR);
     30 			defer io::close(&tagbuf)!;
     31 			if (m != 0) {
     32 				fmt::fprint(&tagbuf, p_main.original[m])!;
     33 				searchio::search(ifile, &tagbuf, p_tagclose);
     34 				fmt::fprint(&tagbuf, ">")!;
     35 			};
     36 
     37 			if (m == 0) {
     38 				// html comments
     39 				fmt::fprint(ofile, "<!--")!;
     40 				searchio::search(ifile, ofile, p_comment);
     41 				fmt::fprint(ofile, "-->")!;
     42 			} else if (m == 1) {
     43 				// <script> -> look at src and replace the whole
     44 				// tag
     45 				const src = tag_get_attr(tagbuf.buf,
     46 					strings::toutf8("src"));
     47 				if (src is not_found) {
     48 					io::writeall(ofile, tagbuf.buf)!;
     49 				} else {
     50 					searchio::search(ifile, black_hole,
     51 						p_scriptclose);
     52 					const src = src: tag_split;
     53 					const src = strings::fromutf8(src.1);
     54 					fmt::fprint(ofile, "<script>\n")!;
     55 					const src = resolve_path(src,
     56 						inputpath);
     57 					defer free(src);
     58 					tacker_js(src, ofile, true);
     59 					fmt::fprint(ofile, "</script>")!;
     60 				};
     61 			} else if (m == 2) {
     62 				// stylesheets -> look at rel and href and
     63 				// replace the whole tag
     64 				const rel = tag_get_attr(tagbuf.buf,
     65 					strings::toutf8("rel"));
     66 				let is_style = rel is tag_split;
     67 				const rel = if (is_style) {
     68 					const rel = rel: tag_split;
     69 					const rel = strings::fromutf8(rel.1);
     70 					if (rel != "stylesheet")
     71 						is_style = false;
     72 					yield rel;
     73 				} else "";
     74 				if (!is_style) {
     75 					io::writeall(ofile, tagbuf.buf)!;
     76 				} else {
     77 					const href = tag_get_attr(tagbuf.buf,
     78 						strings::toutf8("href"));
     79 					if (href is not_found)
     80 						fixed_fatalf("{}: broken style tag \"{}\".",
     81 							inputpath,
     82 							strings::fromutf8(
     83 							tagbuf.buf));
     84 					const href = href: tag_split;
     85 					const href = strings::fromutf8(href.1);
     86 					fmt::fprint(ofile, "<style>\n")!;
     87 					const href = resolve_path(href,
     88 						inputpath);
     89 					defer free(href);
     90 					tacker_css(href, ofile);
     91 					fmt::fprint(ofile, "</style>")!;
     92 				};
     93 			} else {
     94 				// media tags -> look at src and replace only
     95 				// src, not the whole tag
     96 				const src = tag_get_attr(tagbuf.buf,
     97 					strings::toutf8("src"));
     98 				if (src is not_found) {
     99 					io::writeall(ofile, tagbuf.buf)!;
    100 				} else {
    101 					const src = src: tag_split;
    102 					const srctext = strings::fromutf8(
    103 						src.1);
    104 					const srctext = resolve_path(srctext,
    105 						inputpath);
    106 					defer free(srctext);
    107 					io::writeall(ofile, src.0)!;
    108 					fmt::fprint(ofile, "data:;base64,")!;
    109 					tacker_binary(srctext, ofile);
    110 					io::writeall(ofile, src.2)!;
    111 				};
    112 			};
    113 		} else break;
    114 	};
    115 };
    116 
    117 
    118 // Search an attribute inside an html tag.
    119 // Return the input buffer split at the borders of the value of the found
    120 // attribute.
    121 type not_found = void;
    122 type tag_split = ([]u8, []u8, []u8);
    123 type quotes = enum {
    124 	NORMAL,
    125 	SINGLE,
    126 	DOUBLE
    127 };
    128 fn tag_get_attr(tag: []u8, attr: []u8) (tag_split | not_found) = {
    129 	// STRATEGY
    130 	// go through the string and find all '=' that are not inside quotes
    131 	// find the corresponding attribute name and if it is fitting find the
    132 	// corresponding value
    133 	let state = quotes::NORMAL;
    134 	for (let i = 0z; i < len(tag); i += 1) {
    135 		switch (state) {
    136 		case quotes::NORMAL => {
    137 			if (tag[i] == '"') state = quotes::DOUBLE;
    138 			if (tag[i] == '\'') state = quotes::SINGLE;
    139 			if (tag[i] == '=') {
    140 				const pos_equal = i;
    141 				let pos_endattr = pos_equal - 1;
    142 				for (isspace(tag[pos_endattr]))
    143 					pos_endattr -= 1;
    144 				pos_endattr += 1;
    145 				let pos_startattr = pos_endattr - 1;
    146 				for (!isspace(tag[pos_startattr]))
    147 					pos_startattr -= 1;
    148 				pos_startattr += 1;
    149 				const amatch = tag[pos_startattr..pos_endattr];
    150 				if (cmpcase(attr, amatch)) {
    151 					let pos_startval = pos_equal + 1;
    152 					for (isspace(tag[pos_startval]))
    153 						pos_startval += 1;
    154 					let pos_endval = pos_startval;
    155 					if (tag[pos_startval] == '\'') {
    156 						pos_startval += 1;
    157 						pos_endval += 1;
    158 						for (tag[pos_endval] != '\'')
    159 							pos_endval += 1;
    160 					} else if (tag[pos_startval] == '"') {
    161 						pos_startval += 1;
    162 						pos_endval += 1;
    163 						for (tag[pos_endval] != '"')
    164 							pos_endval += 1;
    165 					} else {
    166 						for (!isspace(tag[pos_endval]))
    167 							pos_endval += 1;
    168 					};
    169 					return (tag[..pos_startval],
    170 						tag[pos_startval..pos_endval],
    171 						tag[pos_endval..]);
    172 				};
    173 			};
    174 		};
    175 		case quotes::SINGLE => {
    176 			if (tag[i] == '\'') state = quotes::NORMAL;
    177 		};
    178 		case quotes::DOUBLE => {
    179 			if (tag[i] == '"') state = quotes::NORMAL;
    180 		};
    181 		};
    182 	};
    183 	return not_found;
    184 };
    185 
    186 
    187 @test fn tag_get_attr() void = {
    188 	const t = strings::toutf8("<script attr1 attr2 = test attr3= 'asdasdf = \"asdfasdf' sRc=\"filename\">");
    189 	const a = strings::toutf8("src");
    190 	const res = tag_get_attr(t, a);
    191 	assert(res is tag_split);
    192 	const res = res: tag_split;
    193 	assert(strings::fromutf8(res.0) == "<script attr1 attr2 = test attr3= 'asdasdf = \"asdfasdf' sRc=\"");
    194 	assert(strings::fromutf8(res.1) == "filename");
    195 	assert(strings::fromutf8(res.2) == "\">");
    196 };
    197 
    198 
    199 // There are similar functions in the ascii module but there is no utf8 in
    200 // html attributes so this is sufficient.
    201 fn isspace(c: u8) bool =
    202 	c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
    203 
    204 
    205 fn tolower(c: u8) u8 = {
    206 	if ('A' <= c && c <= 'Z') return c - 'A' + 'a';
    207 	return c;
    208 };
    209 
    210 
    211 // Return true if the two buffers are case-insensitively equal
    212 fn cmpcase(a: []u8, b: []u8) bool = {
    213 	if (len(a) != len(b)) return false;
    214 	for (let i = 0z; i < len(a); i += 1) {
    215 		if (tolower(a[i]) != tolower(b[i])) return false;
    216 	};
    217 	return true;
    218 };