tacker

a simple web bundler
git clone https://tongong.net/git/tacker.git
Log | Files | Refs | README

bundle_js.ha (10990B)


      1 use bufio;
      2 use bytes;
      3 use fmt;
      4 use io;
      5 use os;
      6 use searchio;
      7 use strings;
      8 
      9 // STRATEGY
     10 // Two passes are needed:
     11 // - First pass: Scan the source files and create a dependency graph
     12 // - Second pass: Bundle all needed files
     13 
     14 type jsfile = struct {
     15 	path: str,
     16 	dependencies: []size, // indizes into dep_graph
     17 	scanned: bool, // if the file was scanned for its dependencies already
     18 };
     19 
     20 type dep_graph = []jsfile;
     21 
     22 fn dep_graph_free(g: dep_graph) void = {
     23 	for (let i = 0z; i < len(g); i += 1) {
     24 		free(g[i].path);
     25 		free(g[i].dependencies);
     26 	};
     27 	free(g);
     28 };
     29 
     30 // html: true if the output can be inlined in a html script tag. This is
     31 //       important because code like e.g.
     32 //           let tag = "</script>";
     33 //       has to be escaped.
     34 // inputs are borrowed
     35 fn tacker_js(inputpath: str, ofile: io::handle, html: bool) void = {
     36 	const s = script_guard(ofile);
     37 	if (html) ofile = &s;
     38 	let g: dep_graph = [];
     39 	defer dep_graph_free(g);
     40 	dep_add(void, inputpath, &g);
     41 	const sorting = sort_kahn(g, inputpath);
     42 	defer free(sorting);
     43 	fmt::fprintln(ofile, "(async function() {")!;
     44 	for (let i = 0z; i < len(sorting); i += 1) {
     45 		fmt::fprintfln(ofile, "const _tacker{} = await (async function() {{", sorting[i])!;
     46 		fmt::fprintln(ofile, "const module = { exports: {} }, exports = module.exports;")!;
     47 		emit_bundled(g[sorting[i]].path, ofile, g);
     48 		fmt::fprintln(ofile, "\nreturn module.exports;")!;
     49 		fmt::fprintln(ofile, "})();")!;
     50 	};
     51 	fmt::fprintln(ofile, "})();")!;
     52 	if (html) io::close(ofile)!;
     53 };
     54 
     55 
     56 let p_req: searchio::pattern = searchio::pattern {...};
     57 let p_newline: searchio::pattern = searchio::pattern {...};
     58 let p_commentend: searchio::pattern = searchio::pattern {...};
     59 let p_quotedouble: searchio::pattern = searchio::pattern {...};
     60 let p_quotesingle: searchio::pattern = searchio::pattern {...};
     61 
     62 @init fn init() void = {
     63 	// "/" has to be recognized as regex literal or comment start
     64 	p_req = searchio::compile(["require(", "/", "\"", "'", "`"]);
     65 	p_newline = searchio::compile(["\n"]);
     66 	p_commentend = searchio::compile(["*/"]);
     67 	p_quotedouble = searchio::compile(["\""]);
     68 	p_quotesingle = searchio::compile(["'"]);
     69 };
     70 
     71 @fini fn fini() void = {
     72 	defer searchio::finish(p_req);
     73 	defer searchio::finish(p_newline);
     74 	defer searchio::finish(p_commentend);
     75 	defer searchio::finish(p_quotedouble);
     76 	defer searchio::finish(p_quotesingle);
     77 };
     78 
     79 
     80 // Add a connection frompath -> deppath to the dependency graph
     81 // inputs are borrowed
     82 fn dep_add(frompath: (str | void), deppath: str, graph: *dep_graph) void = {
     83 	const g = *graph;
     84 	let depindex = 0z;
     85 	for (depindex < len(g) && g[depindex].path != deppath) depindex += 1;
     86 	if (depindex == len(g)) {
     87 		append(g, jsfile {
     88 			path = strings::dup(deppath),
     89 			dependencies = [],
     90 			scanned = false
     91 		});
     92 	};
     93 	// add link to the graph
     94 	if (frompath is str) {
     95 		const frompath = frompath: str;
     96 		let fromindex = 0z;
     97 		for (fromindex < len(g) && g[fromindex].path != frompath)
     98 			fromindex += 1;
     99 		append(g[fromindex].dependencies, depindex);
    100 	};
    101 	// scan deppath if neccessarry
    102 	if (g[depindex].scanned == false) {
    103 		g[depindex].scanned = true;
    104 		*graph = g;
    105 		dep_scan(deppath, graph);
    106 	};
    107 };
    108 
    109 
    110 // Recursively scan and add a file to the dependency graph
    111 // inputs are borrowed
    112 fn dep_scan(inputpath: str, graph: *dep_graph) void = {
    113 	const ifile = os::open(inputpath)!;
    114 	defer io::close(ifile)!;
    115 	// Read until require or comment or quote
    116 	for (true) {
    117 		const m = searchio::search(ifile, black_hole, p_req);
    118 		if (m is size) {
    119 			if (m == 0) {
    120 				const p = read_require(ifile, inputpath);
    121 				if (p is str) {
    122 					const p = p: str;
    123 					defer free(p);
    124 					const p = resolve_path_require(p,
    125 						inputpath);
    126 					defer free(p);
    127 					dep_add(inputpath, p, graph);
    128 				};
    129 			} else if (m == 1) {
    130 				// "/*", "//" or "/regex/"
    131 				const buf: [1]u8 = [' '];
    132 				if (io::read(ifile, buf) is io::EOF) break;
    133 				if (buf[0] == '/') {
    134 					searchio::search(ifile, black_hole,
    135 						p_newline);
    136 				} else if (buf[0] == '*') {
    137 					searchio::search(ifile, black_hole,
    138 						p_commentend);
    139 				} else break;
    140 			} else break; // '"', "'" or "`"
    141 		} else break;
    142 	};
    143 };
    144 
    145 
    146 // Parse the contents of a require() macro and return the file path.
    147 // Return value has to be freed.
    148 // Return void if require() is part of a longer identifier
    149 fn read_require(in: io::handle, path: str) (str | void) = {
    150 	const buf: [1]u8 = [' '];
    151 	// Check if require() is part of another identifier like my_require()
    152 	if (!(io::seek(in, -9, io::whence::CUR) is io::error)) {
    153 		io::read(in, buf)!;
    154 		io::seek(in, 8, io::whence::CUR)!;
    155 		// this weird string contains all characters that are allowed in
    156 		// a js source file but not in an identifier
    157 		if (!strings::contains("\t\n\r !%&()*+,-./:;<=>?[]^{|}~",
    158 				buf[0]: u32: rune))
    159 			return void;
    160 	};
    161 
    162 	io::read(in, buf)!;
    163 	let broken = false;
    164 	if (buf[0] == '"' || buf[0] == '\'') {
    165 		let namebuf = bufio::dynamic(io::mode::WRITE);
    166 		const pattern = if (buf[0] == '\'') p_quotesingle
    167 			else p_quotedouble;
    168 		searchio::search(in, &namebuf, pattern);
    169 		let ret = strings::fromutf8(namebuf.buf);
    170 		io::read(in, buf)!;
    171 		if (buf[0] == ')') return ret;
    172 	};
    173 	fixed_fatalf("{}: broken require() call.", path);
    174 	return ""; // will not be reached
    175 };
    176 
    177 
    178 // Kahn's algorithm https://en.wikipedia.org/wiki/Topological_sorting
    179 // Return value has to be freed
    180 fn sort_kahn(graph: dep_graph, entrypath: str) []size = {
    181 	let sorting: []size = [];
    182 	for (let i = 0z; i < len(graph); i += 1) {
    183 		graph[i].scanned = false;
    184 	};
    185 	for (true) {
    186 		// Find file without dependencies
    187 		let f = 0z; // index into graph
    188 		for (f < len(graph); f += 1) {
    189 			const deps = graph[f].dependencies;
    190 			let no_deps = true;
    191 			for (let i = 0z; i < len(deps); i += 1) {
    192 				// Deleted edges in graph will be set to -1
    193 				if (deps[i] != -1) {
    194 					no_deps = false;
    195 					break;
    196 				};
    197 			};
    198 			if (no_deps && !graph[f].scanned) break;
    199 		};
    200 		if (f == len(graph)) {
    201 			// If no file without dependencies can be found -> error
    202 			fixed_fatalf("{}: circular javascript dependencies.",
    203 				entrypath);
    204 		};
    205 		// Delete dependencies for dependent files
    206 		for (let i = 0z; i < len(graph); i += 1) {
    207 			let deps = graph[i].dependencies;
    208 			for (let j = 0z; j < len(deps); j += 1) {
    209 				if (deps[j] == f) deps[j] = -1;
    210 			};
    211 		};
    212 		graph[f].scanned = true;
    213 		append(sorting, f);
    214 		// If all files are sorted -> stop
    215 		if (len(sorting) == len(graph)) return sorting;
    216 	};
    217 	return []; // will not be reached
    218 };
    219 
    220 
    221 // Resolve require() and add files to the bundle
    222 // very similar to dep_scan()
    223 fn emit_bundled(inputpath: str, ofile: io::handle, graph: dep_graph) void = {
    224 	const ifile = os::open(inputpath)!;
    225 	defer io::close(ifile)!;
    226 
    227 	// Read until require or comment or quote
    228 	for (true) {
    229 		const m = searchio::search(ifile, ofile, p_req);
    230 		if (m is size) {
    231 			const m = m: size;
    232 			if (m == 0) {
    233 				const p = read_require(ifile,
    234 					inputpath);
    235 				if (p is str) {
    236 					const p = p: str;
    237 					defer free(p);
    238 					const p = resolve_path_require(
    239 						p, inputpath);
    240 					defer free(p);
    241 					let i = 0z;
    242 					// could break if files are
    243 					// changed in race condition
    244 					for (graph[i].path != p) i += 1;
    245 					fmt::fprintf(ofile, "_tacker{}", i)!;
    246 				} else fmt::fprint(ofile, "require(")!;
    247 			} else if (m == 1) {
    248 				// "/*", "//" or "/regex/"
    249 				fmt::fprint(ofile, "/")!;
    250 				const buf: [1]u8 = [' '];
    251 				if (io::read(ifile, buf) is io::EOF) break;
    252 				io::writeall(ofile, buf)!;
    253 				if (buf[0] == '/') {
    254 					searchio::search(ifile, ofile,
    255 						p_newline);
    256 					fmt::fprint(ofile, "\n")!;
    257 				} else if (buf[0] == '*') {
    258 					searchio::search(ifile, ofile,
    259 						p_commentend);
    260 					fmt::fprint(ofile, "*/")!;
    261 				} else break;
    262 			} else {
    263 				// '"', "'" or "`"
    264 				fmt::fprint(ofile, p_req.original[m])!;
    265 				break;
    266 			};
    267 		} else break;
    268 	};
    269 	search_require(ifile, ofile, inputpath);
    270 };
    271 
    272 // Find potentially lost require() calls while piping everything from in to out.
    273 fn search_require(in: io::handle, out: io::handle, inputpath: str) void = {
    274 	const re = strings::toutf8("require("); // 7 = len(re) - 1
    275 	static let buf_ext: [4096 + 7]u8 = [0...];
    276 	let buf = buf_ext[7..];
    277 	for (true) {
    278 		match (io::read(in, buf)!) {
    279 		case let n: size => {
    280 			io::writeall(out, buf[..n])!;
    281 			if (bytes::contains(buf[0..n], re)) {
    282 				warningf("{}: file could contain skipped require() calls.",
    283 					inputpath);
    284 				break;
    285 			};
    286 			buf_ext[..7] = buf_ext[n..n+7];
    287 		};
    288 		case io::EOF => break;
    289 		};
    290 	};
    291 	io::copy(out, in)!;
    292 };
    293 
    294 type sgstream = struct {
    295 	stream: io::stream,
    296 	sink: io::handle,
    297 	buffered: size, // number of buffered bytes in script
    298 };
    299 
    300 const sg_vtable: io::vtable = io::vtable {
    301 	writer = &sg_write,
    302 	closer = &sg_close,
    303 	...
    304 };
    305 
    306 // Create a writeonly handle that replaces </script with <\/script
    307 fn script_guard(sink: io::handle) sgstream = {
    308 	return sgstream {
    309 		stream = &sg_vtable,
    310 		sink = sink,
    311 		buffered = 0,
    312 	};
    313 };
    314 
    315 const scriptbuf: [8]u8 = ['<', '/', 's', 'c', 'r', 'i', 'p', 't'];
    316 
    317 fn sg_write(st: *io::stream, buf_orig: const []u8) (size | io::error) = {
    318 	const st = st: *sgstream;
    319 	let buf = buf_orig;
    320 	// IDEA
    321 	// if the buffer size is smaller than the rest of the scriptbuf
    322 	// - test if the buffer fits into the rest of the scriptbuf
    323 	// - if not test if the buffer is start of a new script string
    324 	// if the buffer size is bigger or equal to the rest of the scriptbuf
    325 	// - check if the scriptbuf is finished
    326 	const scriptleft = scriptbuf[st.buffered..];
    327 	if (len(buf) < len(scriptleft)) {
    328 		if (bytes::hasprefix(scriptleft, buf)) {
    329 			st.buffered += len(buf);
    330 		} else {
    331 			io::writeall(st.sink, scriptbuf[..st.buffered])!;
    332 			if (bytes::hasprefix(scriptbuf, buf)) {
    333 				st.buffered = len(buf);
    334 			} else {
    335 				io::writeall(st.sink, buf)!;
    336 				st.buffered = 0;
    337 			};
    338 		};
    339 	} else {
    340 		if (bytes::hasprefix(buf, scriptleft)) {
    341 			buf = buf[len(scriptleft)..];
    342 			io::writeall(st.sink, strings::toutf8("<\\/script"))!;
    343 		} else {
    344 			io::writeall(st.sink, scriptbuf[..st.buffered])!;
    345 		};
    346 		let written = false;
    347 		for (let i = 1z; i < len(scriptbuf); i += 1) {
    348 			if (len(buf) > i && bytes::hassuffix(buf,
    349 					scriptbuf[..i])) {
    350 				st.buffered = i;
    351 				sg_write_single(st.sink, buf[..len(buf)-i]);
    352 				written = true;
    353 			};
    354 		};
    355 		if (!written) {
    356 			sg_write_single(st.sink, buf);
    357 			st.buffered = 0;
    358 		};
    359 	};
    360 	return len(buf_orig);
    361 };
    362 
    363 // Securely write like sg_write without checking matches across multiple writes
    364 fn sg_write_single(sink: io::handle, buf: []u8) void = {
    365 	if (bytes::contains(buf, scriptbuf)) {
    366 		let i = 0z; // number of bytes written
    367 		for (i <= len(buf) - len(scriptbuf)) {
    368 			if (bytes::equal(buf[i..i+len(scriptbuf)], scriptbuf)) {
    369 				io::writeall(sink,
    370 					strings::toutf8("<\\/script"))!;
    371 				i += len(scriptbuf);
    372 			} else {
    373 				io::writeall(sink, buf[i..i+1])!;
    374 				i += 1;
    375 			};
    376 		};
    377 		io::writeall(sink, buf[i..])!;
    378 	} else io::writeall(sink, buf)!;
    379 };
    380 
    381 fn sg_close(st: *io::stream) (void | io::error) = {
    382 	const st = st: *sgstream;
    383 	if (st.buffered != 0) io::writeall(st.sink, scriptbuf[..st.buffered])!;
    384 };