bundle_html.ha (6231B)
1 use bufio; 2 use fmt; 3 use io; 4 use os; 5 use searchio; 6 use strings; 7 8 // https://html.spec.whatwg.org/multipage/syntax.html 9 10 // inputs are borrowed 11 fn tacker_html(inputpath: str, ofile: io::handle) void = { 12 const ifile = os::open(inputpath)!; 13 defer io::close(ifile)!; 14 const p_main = searchio::compile(["<!--", "<script", "<link", 15 "<audio", "<embed", "<img", "<source", "<track", "<video"]); 16 const p_comment = searchio::compile(["-->"]); 17 const p_tagclose = searchio::compile([">"]); 18 const p_scriptclose = searchio::compile(["</script>"]); 19 defer searchio::finish(p_main); 20 defer searchio::finish(p_comment); 21 defer searchio::finish(p_tagclose); 22 defer searchio::finish(p_scriptclose); 23 24 for (true) { 25 const m = searchio::search(ifile, ofile, p_main); 26 if (m is size) { 27 const m = m: size; 28 29 let tagbuf = bufio::dynamic(io::mode::RDWR); 30 defer io::close(&tagbuf)!; 31 if (m != 0) { 32 fmt::fprint(&tagbuf, p_main.original[m])!; 33 searchio::search(ifile, &tagbuf, p_tagclose); 34 fmt::fprint(&tagbuf, ">")!; 35 }; 36 37 if (m == 0) { 38 // html comments 39 fmt::fprint(ofile, "<!--")!; 40 searchio::search(ifile, ofile, p_comment); 41 fmt::fprint(ofile, "-->")!; 42 } else if (m == 1) { 43 // <script> -> look at src and replace the whole 44 // tag 45 const src = tag_get_attr(tagbuf.buf, 46 strings::toutf8("src")); 47 if (src is not_found) { 48 io::writeall(ofile, tagbuf.buf)!; 49 } else { 50 searchio::search(ifile, black_hole, 51 p_scriptclose); 52 const src = src: tag_split; 53 const src = strings::fromutf8(src.1); 54 fmt::fprint(ofile, "<script>\n")!; 55 const src = resolve_path(src, 56 inputpath); 57 defer free(src); 58 tacker_js(src, ofile, true); 59 fmt::fprint(ofile, "</script>")!; 60 }; 61 } else if (m == 2) { 62 // stylesheets -> look at rel and href and 63 // replace the whole tag 64 const rel = tag_get_attr(tagbuf.buf, 65 strings::toutf8("rel")); 66 let is_style = rel is tag_split; 67 const rel = if (is_style) { 68 const rel = rel: tag_split; 69 const rel = strings::fromutf8(rel.1); 70 if (rel != "stylesheet") 71 is_style = false; 72 yield rel; 73 } else ""; 74 if (!is_style) { 75 io::writeall(ofile, tagbuf.buf)!; 76 } else { 77 const href = tag_get_attr(tagbuf.buf, 78 strings::toutf8("href")); 79 if (href is not_found) 80 fixed_fatalf("{}: broken style tag \"{}\".", 81 inputpath, 82 strings::fromutf8( 83 tagbuf.buf)); 84 const href = href: tag_split; 85 const href = strings::fromutf8(href.1); 86 fmt::fprint(ofile, "<style>\n")!; 87 const href = resolve_path(href, 88 inputpath); 89 defer free(href); 90 tacker_css(href, ofile); 91 fmt::fprint(ofile, "</style>")!; 92 }; 93 } else { 94 // media tags -> look at src and replace only 95 // src, not the whole tag 96 const src = tag_get_attr(tagbuf.buf, 97 strings::toutf8("src")); 98 if (src is not_found) { 99 io::writeall(ofile, tagbuf.buf)!; 100 } else { 101 const src = src: tag_split; 102 const srctext = strings::fromutf8( 103 src.1); 104 const srctext = resolve_path(srctext, 105 inputpath); 106 defer free(srctext); 107 io::writeall(ofile, src.0)!; 108 fmt::fprint(ofile, "data:;base64,")!; 109 tacker_binary(srctext, ofile); 110 io::writeall(ofile, src.2)!; 111 }; 112 }; 113 } else break; 114 }; 115 }; 116 117 118 // Search an attribute inside an html tag. 119 // Return the input buffer split at the borders of the value of the found 120 // attribute. 121 type not_found = void; 122 type tag_split = ([]u8, []u8, []u8); 123 type quotes = enum { 124 NORMAL, 125 SINGLE, 126 DOUBLE 127 }; 128 fn tag_get_attr(tag: []u8, attr: []u8) (tag_split | not_found) = { 129 // STRATEGY 130 // go through the string and find all '=' that are not inside quotes 131 // find the corresponding attribute name and if it is fitting find the 132 // corresponding value 133 let state = quotes::NORMAL; 134 for (let i = 0z; i < len(tag); i += 1) { 135 switch (state) { 136 case quotes::NORMAL => { 137 if (tag[i] == '"') state = quotes::DOUBLE; 138 if (tag[i] == '\'') state = quotes::SINGLE; 139 if (tag[i] == '=') { 140 const pos_equal = i; 141 let pos_endattr = pos_equal - 1; 142 for (isspace(tag[pos_endattr])) 143 pos_endattr -= 1; 144 pos_endattr += 1; 145 let pos_startattr = pos_endattr - 1; 146 for (!isspace(tag[pos_startattr])) 147 pos_startattr -= 1; 148 pos_startattr += 1; 149 const amatch = tag[pos_startattr..pos_endattr]; 150 if (cmpcase(attr, amatch)) { 151 let pos_startval = pos_equal + 1; 152 for (isspace(tag[pos_startval])) 153 pos_startval += 1; 154 let pos_endval = pos_startval; 155 if (tag[pos_startval] == '\'') { 156 pos_startval += 1; 157 pos_endval += 1; 158 for (tag[pos_endval] != '\'') 159 pos_endval += 1; 160 } else if (tag[pos_startval] == '"') { 161 pos_startval += 1; 162 pos_endval += 1; 163 for (tag[pos_endval] != '"') 164 pos_endval += 1; 165 } else { 166 for (!isspace(tag[pos_endval])) 167 pos_endval += 1; 168 }; 169 return (tag[..pos_startval], 170 tag[pos_startval..pos_endval], 171 tag[pos_endval..]); 172 }; 173 }; 174 }; 175 case quotes::SINGLE => { 176 if (tag[i] == '\'') state = quotes::NORMAL; 177 }; 178 case quotes::DOUBLE => { 179 if (tag[i] == '"') state = quotes::NORMAL; 180 }; 181 }; 182 }; 183 return not_found; 184 }; 185 186 187 @test fn tag_get_attr() void = { 188 const t = strings::toutf8("<script attr1 attr2 = test attr3= 'asdasdf = \"asdfasdf' sRc=\"filename\">"); 189 const a = strings::toutf8("src"); 190 const res = tag_get_attr(t, a); 191 assert(res is tag_split); 192 const res = res: tag_split; 193 assert(strings::fromutf8(res.0) == "<script attr1 attr2 = test attr3= 'asdasdf = \"asdfasdf' sRc=\""); 194 assert(strings::fromutf8(res.1) == "filename"); 195 assert(strings::fromutf8(res.2) == "\">"); 196 }; 197 198 199 // There are similar functions in the ascii module but there is no utf8 in 200 // html attributes so this is sufficient. 201 fn isspace(c: u8) bool = 202 c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r'; 203 204 205 fn tolower(c: u8) u8 = { 206 if ('A' <= c && c <= 'Z') return c - 'A' + 'a'; 207 return c; 208 }; 209 210 211 // Return true if the two buffers are case-insensitively equal 212 fn cmpcase(a: []u8, b: []u8) bool = { 213 if (len(a) != len(b)) return false; 214 for (let i = 0z; i < len(a); i += 1) { 215 if (tolower(a[i]) != tolower(b[i])) return false; 216 }; 217 return true; 218 };