tacker

a simple web bundler
git clone https://tongong.net/git/tacker.git
Log | Files | Refs | README

commit 181a2fb16d2809d57841b2d8f649f2b341b5f40b
parent b72c2c33a1df9d77d67fb77d52ea7c67a02e379f
Author: tongong <tongong@gmx.net>
Date:   Fri, 15 Jul 2022 21:34:39 +0200

added js require scanning

Diffstat:
MREADME.md | 8++++----
Mbundle_html.ha | 2+-
Mbundle_js.ha | 171+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Ahelpers.ha | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dpath_helpers.ha | 94-------------------------------------------------------------------------------
Mtest-page/a.js | 2+-
Mtest-page/b.js | 4+++-
Mtest-page/c.js | 2+-
8 files changed, 296 insertions(+), 106 deletions(-)

diff --git a/README.md b/README.md @@ -71,10 +71,10 @@ correctly recognizing regex literals as they could contain quote characters and as far as I know this requires parsing the whole AST (how to decide if `/5/` is a regex or part of an arithmetic expression?). A similar problem arises for template literals. To avoid this complexity `tacker` only reads until reaching -the first string, regex or template literal. This means that module imports -have to be at the top of each source file which is the case already for most -projects. All potentially skipped `require()` calls will be announced as a -warning. +the first character that could be start of a string, regex or template literal. +This means that module imports have to be at the top of each source file which +is the case already for most projects. All potentially skipped `require()` +calls will be announced as a warning. ### script end tags & regex literals When inlining javascript in html, the script cannot contain script end tags diff --git a/bundle_html.ha b/bundle_html.ha @@ -47,7 +47,7 @@ fn tacker_html(inputpath: str, ofile: io::handle) void = { if (src is not_found) { io::write(ofile, tagbuf.buf)!; } else { - searchio::search(ifile, &tagbuf, + searchio::search(ifile, black_hole, p_scriptclose); const src = src: tag_split; const src = strings::fromutf8(src.1); diff --git a/bundle_js.ha b/bundle_js.ha @@ -1,18 +1,181 @@ +use bufio; use fmt; use io; use os; +use searchio; +use strings; + +// STRATEGY +// Two passes are needed: +// - First pass: Scan the source files and create a dependency graph +// - Second pass: Bundle all needed files + +type jsfile = struct { + path: str, + dependencies: []size, // indizes into dep_graph + scanned: bool, // if the file was scanned for its dependencies already +}; + +type dep_graph = []jsfile; + +fn dep_graph_free(g: dep_graph) void = { + for (let i = 0z; i < len(g); i += 1) { + free(g[i].path); + free(g[i].dependencies); + }; + free(g); +}; // html: true if the output can be inlined in a html script tag. This is // important because code like e.g. // let tag = "</script>"; // has to be escaped. +// inputs are borrowed fn tacker_js(inputpath: str, ofile: io::handle, html: bool) void = { + let g: dep_graph = []; + defer { + for (let i = 0z; i < len(g); i += 1) + free(g[i].dependencies); + free(g); + }; + dep_add(void, inputpath, &g); + // TODO + for (let i = 0z; i < len(g); i += 1) { + fmt::printf("{}: {} - ", i, g[i].path)!; + const dep = g[i].dependencies; + for (let j = 0z; j < len(dep); j += 1) { + fmt::printf("{},", dep[j])!; + }; + fmt::println("")!; + }; + dep_graph_free(g); +}; + +let p_req: searchio::pattern = searchio::pattern {...}; +let p_newline: searchio::pattern = searchio::pattern {...}; +let p_commentend: searchio::pattern = searchio::pattern {...}; +let p_quotedouble: searchio::pattern = searchio::pattern {...}; +let p_quotesingle: searchio::pattern = searchio::pattern {...}; + +@init fn init() void = { + // "/" has to be recognized as regex literal or comment start + p_req = searchio::compile(["require(", "/", "\"", "'", "`"]); + p_newline = searchio::compile(["\n"]); + p_commentend = searchio::compile(["*/"]); + p_quotedouble = searchio::compile(["\""]); + p_quotesingle = searchio::compile(["'"]); +}; + +@fini fn fini() void = { + defer searchio::finish(p_req); + defer searchio::finish(p_newline); + defer searchio::finish(p_commentend); + defer searchio::finish(p_quotedouble); + defer searchio::finish(p_quotesingle); +}; + +// Add a connection frompath -> deppath to the dependency graph +// inputs are borrowed +fn dep_add(frompath: (str | void), deppath: str, graph: *dep_graph) void = { + const g = *graph; + let depindex = 0z; + for (depindex < len(g) && g[depindex].path != deppath) depindex += 1; + if (depindex == len(g)) { + append(g, jsfile { + path = strings::dup(deppath), + dependencies = [], + scanned = false + }); + }; + // add link to the graph + if (frompath is str) { + const frompath = frompath: str; + let fromindex = 0z; + for (fromindex < len(g) && g[fromindex].path != frompath) + fromindex += 1; + append(g[fromindex].dependencies, depindex); + }; + // scan deppath if neccessarry + if (g[depindex].scanned == false) { + g[depindex].scanned = true; + *graph = g; + dep_scan(deppath, graph); + }; +}; + +// Recursively scan and add a file to the dependency graph +// inputs are borrowed +fn dep_scan(inputpath: str, graph: *dep_graph) void = { const ifile = os::open(inputpath)!; defer io::close(ifile)!; - // TODO + // Read until require or comment or quote + // if start of string literal etc was found (disabled require) + let disabled = false; for (true) { - let buf: [1]u8 = [' ']; - if (io::read(ifile, buf) is io::EOF) return; - io::write(ofile, buf)!; + const m = searchio::search(ifile, black_hole, p_req); + if (m is size) { + if (m == 0) { + if (disabled == false) { + const p = read_require(ifile, + inputpath); + if (p is str) { + const p = p: str; + defer free(p); + const p = resolve_path_require( + p, inputpath); + defer free(p); + dep_add(inputpath, p, graph); + }; + } else { + fmt::fprintfln(os::stderr, "file \"{}\" could contain skipped require() calls.", inputpath)!; + break; + }; + } else if (m == 1) { + // "/*", "//" or "/regex/" + const buf: [1]u8 = [' ']; + if (io::read(ifile, buf) is io::EOF) break; + if (buf[0] == '/') { + searchio::search(ifile, black_hole, + p_newline); + } else if (buf[0] == '*') { + searchio::search(ifile, black_hole, + p_commentend); + } else disabled = true; + } else { + // '"', "'" or "`" + disabled = true; + }; + } else break; + }; +}; + +// Is returned if the require() is part of a longer identifier +type no = void; + +// Parse the contents of a require() macro and return the file path. +// Return value has to be freed. +fn read_require(in: io::handle, path: str) (str | no) = { + // Check if require() is part of another identifier like my_require() + io::seek(in, -9, io::whence::CUR)!; + const buf: [1]u8 = [' ']; + io::read(in, buf)!; + io::seek(in, 8, io::whence::CUR)!; + // this weird string contains all characters that are allowed in a js + // source file but not in an identifier + if (!strings::contains(" !%&()*+,-./:;<=>?[]^{|}~", buf[0]: u32: rune)) + return no; + + io::read(in, buf)!; + let broken = false; + if (buf[0] == '"' || buf[0] == '\'') { + let namebuf = bufio::dynamic(io::mode::WRITE); + const pattern = if (buf[0] == '\'') p_quotesingle + else p_quotedouble; + searchio::search(in, &namebuf, pattern); + let ret = strings::fromutf8(namebuf.buf); + io::read(in, buf)!; + if (buf[0] == ')') return ret; }; + fixed_fatalf("{}: broken require() call", path); + return ""; // will not be reached }; diff --git a/helpers.ha b/helpers.ha @@ -0,0 +1,119 @@ +use fs; +use io; +use os; +use slices; +use strings; + +// All bundled files must be within this directory so that malicious modules +// cannot require arbitrary files on the file system. +let basepath: str = ""; +@fini fn fini() void = free(basepath); + +// Cut a string to the last "/". +// Return value is borrowed from the input. +fn parent_dir(path: str) str = { + const bytes = strings::toutf8(path); + let i = len(bytes) - 1; + for (bytes[i] != '/') i -= 1; + return strings::fromutf8(bytes[..(i+1)]); +}; + +// Apply os::realpath and os::resolve. +fn realpath_resolve(path: str) str = { + const p = match (os::realpath(path)) { + case let p: str => yield p; + case let p: fs::error => + fixed_fatalf("path \"{}\" does not exist.", path); + yield ""; // unreachable + }; + return os::resolve(p); +}; + +// path: to be resolved +// from: path to the file (or directory) where the reference was found. +// Return value has to be freed. +fn resolve_path(path: str, from: str) str = { + if (strings::hasprefix(path, "http://") || + strings::hasprefix(path, "https://")) { + fixed_fatalf("bundling of external resources is not allowed: \"{}\".", + path); + }; + // directory path is relativ to base + // ends with "/" + const base = if (strings::hasprefix(path, "./") || + strings::hasprefix(path, "../")) { + yield parent_dir(from); + } else { + yield basepath; + }; + const r = strings::join("", base, path); + defer free(r); + const r = strings::dup(realpath_resolve(r)); + if (!strings::hasprefix(r, basepath)) + fixed_fatalf("file path \"{}\" violates the base path \"{}\".", + r, basepath); + return r; +}; + +// Works like resolve_path() but adds a .js extension if there is none +fn resolve_path_require(path: str, from: str) str = { + return if (strings::hassuffix(path, ".js")) + resolve_path(path, from) + else { + const p = strings::join("", path, ".js"); + const res = resolve_path(p, from); + free(p); + yield res; + }; +}; + +// Return index of the last dot in the filename or -1 if the file contains no +// dot. +fn lastdotindex(filename: str) int = { + const filename = strings::toutf8(filename); + let index = (len(filename) - 1): int; + for (index >= 0 && filename[index] != '.') { + if (filename[index] == '/') return -1; + index -= 1; + }; + return index; +}; + +// return value has to be freed. +fn file_name_bundled(filename: str) str = { + let lastdot = lastdotindex(filename); + // files without extension get the .bundle at the end + if (lastdot == -1) lastdot = len(filename): int; + + const output = strings::dup(filename); + const output = strings::toutf8(output); + + const ext = strings::toutf8(".bundle"); + let bptr: [7]*void = [&ext: *void ...]; + for (let i = 0z; i < len(ext); i += 1) { + bptr[i] = &ext[i]; + }; + slices::insertinto(&output: *[]void, size(u8), lastdot: size, bptr...); + return strings::fromutf8(output); +}; + +@test fn file_name_bundled() void = { + assert(file_name_bundled("test.js") == "test.bundle.js"); + assert(file_name_bundled("test.dot.js") == "test.dot.bundle.js"); + assert(file_name_bundled("no-ext") == "no-ext.bundle"); + assert(file_name_bundled("./dir.a/no-ext") == "./dir.a/no-ext.bundle"); + assert(file_name_bundled("./test.dir/ütf8.html") == + "./test.dir/ütf8.bundle.html"); +}; + +// A file to write useless output to (like /dev/null) +const black_hole: io::handle = &black_hole_s; +const black_hole_s: io::stream = &black_hole_v; +const black_hole_v: io::vtable = io::vtable { + reader = null, + writer = &black_hole_write, + ... +}; +fn black_hole_write(s: *io::stream, buf: const []u8) (size | io::error) = { + return len(buf); +}; diff --git a/path_helpers.ha b/path_helpers.ha @@ -1,94 +0,0 @@ -use fs; -use os; -use slices; -use strings; - -// All bundled files must be within this directory so that malicious modules -// cannot require arbitrary files on the file system. -let basepath: str = ""; -@fini fn fini() void = free(basepath); - -// Cuts a string to the last "/". -// Return value is borrowed from the input. -fn parent_dir(path: str) str = { - const bytes = strings::toutf8(path); - let i = len(bytes) - 1; - for (bytes[i] != '/') i -= 1; - return strings::fromutf8(bytes[..(i+1)]); -}; - -// Applies os::realpath and os::resolve. -fn realpath_resolve(path: str) str = { - const p = match (os::realpath(path)) { - case let p: str => yield p; - case let p: fs::error => - fixed_fatalf("path \"{}\" does not exist.", path); - yield ""; // unreachable - }; - return os::resolve(p); -}; - -// path: to be resolved -// from: path to the file (or directory) where the reference was found. -// Return value has to be freed. -fn resolve_path(path: str, from: str) str = { - if (strings::hasprefix(path, "http://") || - strings::hasprefix(path, "https://")) { - fixed_fatalf("bundling of external resources is not allowed: \"{}\".", - path); - }; - // directory path is relativ to base - // ends with "/" - const base = if (strings::hasprefix(path, "./") || - strings::hasprefix(path, "../")) { - yield parent_dir(from); - } else { - yield basepath; - }; - const r = strings::join("", base, path); - defer free(r); - const r = strings::dup(realpath_resolve(r)); - if (!strings::hasprefix(r, basepath)) - fixed_fatalf("file path \"{}\" violates the base path \"{}\".", - r, basepath); - return r; -}; - -// Returns index of the last dot in the filename or -1 if the file contains no -// dot. -fn lastdotindex(filename: str) int = { - const filename = strings::toutf8(filename); - let index = (len(filename) - 1): int; - for (index >= 0 && filename[index] != '.') { - if (filename[index] == '/') return -1; - index -= 1; - }; - return index; -}; - -// return value has to be freed. -fn file_name_bundled(filename: str) str = { - let lastdot = lastdotindex(filename); - // files without extension get the .bundle at the end - if (lastdot == -1) lastdot = len(filename): int; - - const output = strings::dup(filename); - const output = strings::toutf8(output); - - const ext = strings::toutf8(".bundle"); - let bptr: [7]*void = [&ext: *void ...]; - for (let i = 0z; i < len(ext); i += 1) { - bptr[i] = &ext[i]; - }; - slices::insertinto(&output: *[]void, size(u8), lastdot: size, bptr...); - return strings::fromutf8(output); -}; - -@test fn file_name_bundled() void = { - assert(file_name_bundled("test.js") == "test.bundle.js"); - assert(file_name_bundled("test.dot.js") == "test.dot.bundle.js"); - assert(file_name_bundled("no-ext") == "no-ext.bundle"); - assert(file_name_bundled("./dir.a/no-ext") == "./dir.a/no-ext.bundle"); - assert(file_name_bundled("./test.dir/ütf8.html") == - "./test.dir/ütf8.bundle.html"); -}; diff --git a/test-page/a.js b/test-page/a.js @@ -1,4 +1,4 @@ -// let testm = require("./b.js") +let testm = require("./b.js") // console.log(testm.hello()); let r = "this require('b.js') will not be macro-expanded."; diff --git a/test-page/b.js b/test-page/b.js @@ -1,4 +1,6 @@ +const c = require('./c'); + module.exports = { hello: () => ":)", - c: require("./c"), + c, } diff --git a/test-page/c.js b/test-page/c.js @@ -1,2 +1,2 @@ -console.log(require("./a.js")); +console.log(require("./a.js")); // illegal -> circular dependency exports.msg = ":)";