bundle_js.ha (10990B)
1 use bufio; 2 use bytes; 3 use fmt; 4 use io; 5 use os; 6 use searchio; 7 use strings; 8 9 // STRATEGY 10 // Two passes are needed: 11 // - First pass: Scan the source files and create a dependency graph 12 // - Second pass: Bundle all needed files 13 14 type jsfile = struct { 15 path: str, 16 dependencies: []size, // indizes into dep_graph 17 scanned: bool, // if the file was scanned for its dependencies already 18 }; 19 20 type dep_graph = []jsfile; 21 22 fn dep_graph_free(g: dep_graph) void = { 23 for (let i = 0z; i < len(g); i += 1) { 24 free(g[i].path); 25 free(g[i].dependencies); 26 }; 27 free(g); 28 }; 29 30 // html: true if the output can be inlined in a html script tag. This is 31 // important because code like e.g. 32 // let tag = "</script>"; 33 // has to be escaped. 34 // inputs are borrowed 35 fn tacker_js(inputpath: str, ofile: io::handle, html: bool) void = { 36 const s = script_guard(ofile); 37 if (html) ofile = &s; 38 let g: dep_graph = []; 39 defer dep_graph_free(g); 40 dep_add(void, inputpath, &g); 41 const sorting = sort_kahn(g, inputpath); 42 defer free(sorting); 43 fmt::fprintln(ofile, "(async function() {")!; 44 for (let i = 0z; i < len(sorting); i += 1) { 45 fmt::fprintfln(ofile, "const _tacker{} = await (async function() {{", sorting[i])!; 46 fmt::fprintln(ofile, "const module = { exports: {} }, exports = module.exports;")!; 47 emit_bundled(g[sorting[i]].path, ofile, g); 48 fmt::fprintln(ofile, "\nreturn module.exports;")!; 49 fmt::fprintln(ofile, "})();")!; 50 }; 51 fmt::fprintln(ofile, "})();")!; 52 if (html) io::close(ofile)!; 53 }; 54 55 56 let p_req: searchio::pattern = searchio::pattern {...}; 57 let p_newline: searchio::pattern = searchio::pattern {...}; 58 let p_commentend: searchio::pattern = searchio::pattern {...}; 59 let p_quotedouble: searchio::pattern = searchio::pattern {...}; 60 let p_quotesingle: searchio::pattern = searchio::pattern {...}; 61 62 @init fn init() void = { 63 // "/" has to be recognized as regex literal or comment start 64 p_req = searchio::compile(["require(", "/", "\"", "'", "`"]); 65 p_newline = searchio::compile(["\n"]); 66 p_commentend = searchio::compile(["*/"]); 67 p_quotedouble = searchio::compile(["\""]); 68 p_quotesingle = searchio::compile(["'"]); 69 }; 70 71 @fini fn fini() void = { 72 defer searchio::finish(p_req); 73 defer searchio::finish(p_newline); 74 defer searchio::finish(p_commentend); 75 defer searchio::finish(p_quotedouble); 76 defer searchio::finish(p_quotesingle); 77 }; 78 79 80 // Add a connection frompath -> deppath to the dependency graph 81 // inputs are borrowed 82 fn dep_add(frompath: (str | void), deppath: str, graph: *dep_graph) void = { 83 const g = *graph; 84 let depindex = 0z; 85 for (depindex < len(g) && g[depindex].path != deppath) depindex += 1; 86 if (depindex == len(g)) { 87 append(g, jsfile { 88 path = strings::dup(deppath), 89 dependencies = [], 90 scanned = false 91 }); 92 }; 93 // add link to the graph 94 if (frompath is str) { 95 const frompath = frompath: str; 96 let fromindex = 0z; 97 for (fromindex < len(g) && g[fromindex].path != frompath) 98 fromindex += 1; 99 append(g[fromindex].dependencies, depindex); 100 }; 101 // scan deppath if neccessarry 102 if (g[depindex].scanned == false) { 103 g[depindex].scanned = true; 104 *graph = g; 105 dep_scan(deppath, graph); 106 }; 107 }; 108 109 110 // Recursively scan and add a file to the dependency graph 111 // inputs are borrowed 112 fn dep_scan(inputpath: str, graph: *dep_graph) void = { 113 const ifile = os::open(inputpath)!; 114 defer io::close(ifile)!; 115 // Read until require or comment or quote 116 for (true) { 117 const m = searchio::search(ifile, black_hole, p_req); 118 if (m is size) { 119 if (m == 0) { 120 const p = read_require(ifile, inputpath); 121 if (p is str) { 122 const p = p: str; 123 defer free(p); 124 const p = resolve_path_require(p, 125 inputpath); 126 defer free(p); 127 dep_add(inputpath, p, graph); 128 }; 129 } else if (m == 1) { 130 // "/*", "//" or "/regex/" 131 const buf: [1]u8 = [' ']; 132 if (io::read(ifile, buf) is io::EOF) break; 133 if (buf[0] == '/') { 134 searchio::search(ifile, black_hole, 135 p_newline); 136 } else if (buf[0] == '*') { 137 searchio::search(ifile, black_hole, 138 p_commentend); 139 } else break; 140 } else break; // '"', "'" or "`" 141 } else break; 142 }; 143 }; 144 145 146 // Parse the contents of a require() macro and return the file path. 147 // Return value has to be freed. 148 // Return void if require() is part of a longer identifier 149 fn read_require(in: io::handle, path: str) (str | void) = { 150 const buf: [1]u8 = [' ']; 151 // Check if require() is part of another identifier like my_require() 152 if (!(io::seek(in, -9, io::whence::CUR) is io::error)) { 153 io::read(in, buf)!; 154 io::seek(in, 8, io::whence::CUR)!; 155 // this weird string contains all characters that are allowed in 156 // a js source file but not in an identifier 157 if (!strings::contains("\t\n\r !%&()*+,-./:;<=>?[]^{|}~", 158 buf[0]: u32: rune)) 159 return void; 160 }; 161 162 io::read(in, buf)!; 163 let broken = false; 164 if (buf[0] == '"' || buf[0] == '\'') { 165 let namebuf = bufio::dynamic(io::mode::WRITE); 166 const pattern = if (buf[0] == '\'') p_quotesingle 167 else p_quotedouble; 168 searchio::search(in, &namebuf, pattern); 169 let ret = strings::fromutf8(namebuf.buf); 170 io::read(in, buf)!; 171 if (buf[0] == ')') return ret; 172 }; 173 fixed_fatalf("{}: broken require() call.", path); 174 return ""; // will not be reached 175 }; 176 177 178 // Kahn's algorithm https://en.wikipedia.org/wiki/Topological_sorting 179 // Return value has to be freed 180 fn sort_kahn(graph: dep_graph, entrypath: str) []size = { 181 let sorting: []size = []; 182 for (let i = 0z; i < len(graph); i += 1) { 183 graph[i].scanned = false; 184 }; 185 for (true) { 186 // Find file without dependencies 187 let f = 0z; // index into graph 188 for (f < len(graph); f += 1) { 189 const deps = graph[f].dependencies; 190 let no_deps = true; 191 for (let i = 0z; i < len(deps); i += 1) { 192 // Deleted edges in graph will be set to -1 193 if (deps[i] != -1) { 194 no_deps = false; 195 break; 196 }; 197 }; 198 if (no_deps && !graph[f].scanned) break; 199 }; 200 if (f == len(graph)) { 201 // If no file without dependencies can be found -> error 202 fixed_fatalf("{}: circular javascript dependencies.", 203 entrypath); 204 }; 205 // Delete dependencies for dependent files 206 for (let i = 0z; i < len(graph); i += 1) { 207 let deps = graph[i].dependencies; 208 for (let j = 0z; j < len(deps); j += 1) { 209 if (deps[j] == f) deps[j] = -1; 210 }; 211 }; 212 graph[f].scanned = true; 213 append(sorting, f); 214 // If all files are sorted -> stop 215 if (len(sorting) == len(graph)) return sorting; 216 }; 217 return []; // will not be reached 218 }; 219 220 221 // Resolve require() and add files to the bundle 222 // very similar to dep_scan() 223 fn emit_bundled(inputpath: str, ofile: io::handle, graph: dep_graph) void = { 224 const ifile = os::open(inputpath)!; 225 defer io::close(ifile)!; 226 227 // Read until require or comment or quote 228 for (true) { 229 const m = searchio::search(ifile, ofile, p_req); 230 if (m is size) { 231 const m = m: size; 232 if (m == 0) { 233 const p = read_require(ifile, 234 inputpath); 235 if (p is str) { 236 const p = p: str; 237 defer free(p); 238 const p = resolve_path_require( 239 p, inputpath); 240 defer free(p); 241 let i = 0z; 242 // could break if files are 243 // changed in race condition 244 for (graph[i].path != p) i += 1; 245 fmt::fprintf(ofile, "_tacker{}", i)!; 246 } else fmt::fprint(ofile, "require(")!; 247 } else if (m == 1) { 248 // "/*", "//" or "/regex/" 249 fmt::fprint(ofile, "/")!; 250 const buf: [1]u8 = [' ']; 251 if (io::read(ifile, buf) is io::EOF) break; 252 io::writeall(ofile, buf)!; 253 if (buf[0] == '/') { 254 searchio::search(ifile, ofile, 255 p_newline); 256 fmt::fprint(ofile, "\n")!; 257 } else if (buf[0] == '*') { 258 searchio::search(ifile, ofile, 259 p_commentend); 260 fmt::fprint(ofile, "*/")!; 261 } else break; 262 } else { 263 // '"', "'" or "`" 264 fmt::fprint(ofile, p_req.original[m])!; 265 break; 266 }; 267 } else break; 268 }; 269 search_require(ifile, ofile, inputpath); 270 }; 271 272 // Find potentially lost require() calls while piping everything from in to out. 273 fn search_require(in: io::handle, out: io::handle, inputpath: str) void = { 274 const re = strings::toutf8("require("); // 7 = len(re) - 1 275 static let buf_ext: [4096 + 7]u8 = [0...]; 276 let buf = buf_ext[7..]; 277 for (true) { 278 match (io::read(in, buf)!) { 279 case let n: size => { 280 io::writeall(out, buf[..n])!; 281 if (bytes::contains(buf[0..n], re)) { 282 warningf("{}: file could contain skipped require() calls.", 283 inputpath); 284 break; 285 }; 286 buf_ext[..7] = buf_ext[n..n+7]; 287 }; 288 case io::EOF => break; 289 }; 290 }; 291 io::copy(out, in)!; 292 }; 293 294 type sgstream = struct { 295 stream: io::stream, 296 sink: io::handle, 297 buffered: size, // number of buffered bytes in script 298 }; 299 300 const sg_vtable: io::vtable = io::vtable { 301 writer = &sg_write, 302 closer = &sg_close, 303 ... 304 }; 305 306 // Create a writeonly handle that replaces </script with <\/script 307 fn script_guard(sink: io::handle) sgstream = { 308 return sgstream { 309 stream = &sg_vtable, 310 sink = sink, 311 buffered = 0, 312 }; 313 }; 314 315 const scriptbuf: [8]u8 = ['<', '/', 's', 'c', 'r', 'i', 'p', 't']; 316 317 fn sg_write(st: *io::stream, buf_orig: const []u8) (size | io::error) = { 318 const st = st: *sgstream; 319 let buf = buf_orig; 320 // IDEA 321 // if the buffer size is smaller than the rest of the scriptbuf 322 // - test if the buffer fits into the rest of the scriptbuf 323 // - if not test if the buffer is start of a new script string 324 // if the buffer size is bigger or equal to the rest of the scriptbuf 325 // - check if the scriptbuf is finished 326 const scriptleft = scriptbuf[st.buffered..]; 327 if (len(buf) < len(scriptleft)) { 328 if (bytes::hasprefix(scriptleft, buf)) { 329 st.buffered += len(buf); 330 } else { 331 io::writeall(st.sink, scriptbuf[..st.buffered])!; 332 if (bytes::hasprefix(scriptbuf, buf)) { 333 st.buffered = len(buf); 334 } else { 335 io::writeall(st.sink, buf)!; 336 st.buffered = 0; 337 }; 338 }; 339 } else { 340 if (bytes::hasprefix(buf, scriptleft)) { 341 buf = buf[len(scriptleft)..]; 342 io::writeall(st.sink, strings::toutf8("<\\/script"))!; 343 } else { 344 io::writeall(st.sink, scriptbuf[..st.buffered])!; 345 }; 346 let written = false; 347 for (let i = 1z; i < len(scriptbuf); i += 1) { 348 if (len(buf) > i && bytes::hassuffix(buf, 349 scriptbuf[..i])) { 350 st.buffered = i; 351 sg_write_single(st.sink, buf[..len(buf)-i]); 352 written = true; 353 }; 354 }; 355 if (!written) { 356 sg_write_single(st.sink, buf); 357 st.buffered = 0; 358 }; 359 }; 360 return len(buf_orig); 361 }; 362 363 // Securely write like sg_write without checking matches across multiple writes 364 fn sg_write_single(sink: io::handle, buf: []u8) void = { 365 if (bytes::contains(buf, scriptbuf)) { 366 let i = 0z; // number of bytes written 367 for (i <= len(buf) - len(scriptbuf)) { 368 if (bytes::equal(buf[i..i+len(scriptbuf)], scriptbuf)) { 369 io::writeall(sink, 370 strings::toutf8("<\\/script"))!; 371 i += len(scriptbuf); 372 } else { 373 io::writeall(sink, buf[i..i+1])!; 374 i += 1; 375 }; 376 }; 377 io::writeall(sink, buf[i..])!; 378 } else io::writeall(sink, buf)!; 379 }; 380 381 fn sg_close(st: *io::stream) (void | io::error) = { 382 const st = st: *sgstream; 383 if (st.buffered != 0) io::writeall(st.sink, scriptbuf[..st.buffered])!; 384 };