commit 181a2fb16d2809d57841b2d8f649f2b341b5f40b
parent b72c2c33a1df9d77d67fb77d52ea7c67a02e379f
Author: tongong <tongong@gmx.net>
Date: Fri, 15 Jul 2022 21:34:39 +0200
added js require scanning
Diffstat:
8 files changed, 296 insertions(+), 106 deletions(-)
diff --git a/README.md b/README.md
@@ -71,10 +71,10 @@ correctly recognizing regex literals as they could contain quote characters and
as far as I know this requires parsing the whole AST (how to decide if `/5/` is
a regex or part of an arithmetic expression?). A similar problem arises for
template literals. To avoid this complexity `tacker` only reads until reaching
-the first string, regex or template literal. This means that module imports
-have to be at the top of each source file which is the case already for most
-projects. All potentially skipped `require()` calls will be announced as a
-warning.
+the first character that could be start of a string, regex or template literal.
+This means that module imports have to be at the top of each source file which
+is the case already for most projects. All potentially skipped `require()`
+calls will be announced as a warning.
### script end tags & regex literals
When inlining javascript in html, the script cannot contain script end tags
diff --git a/bundle_html.ha b/bundle_html.ha
@@ -47,7 +47,7 @@ fn tacker_html(inputpath: str, ofile: io::handle) void = {
if (src is not_found) {
io::write(ofile, tagbuf.buf)!;
} else {
- searchio::search(ifile, &tagbuf,
+ searchio::search(ifile, black_hole,
p_scriptclose);
const src = src: tag_split;
const src = strings::fromutf8(src.1);
diff --git a/bundle_js.ha b/bundle_js.ha
@@ -1,18 +1,181 @@
+use bufio;
use fmt;
use io;
use os;
+use searchio;
+use strings;
+
+// STRATEGY
+// Two passes are needed:
+// - First pass: Scan the source files and create a dependency graph
+// - Second pass: Bundle all needed files
+
+type jsfile = struct {
+ path: str,
+ dependencies: []size, // indizes into dep_graph
+ scanned: bool, // if the file was scanned for its dependencies already
+};
+
+type dep_graph = []jsfile;
+
+fn dep_graph_free(g: dep_graph) void = {
+ for (let i = 0z; i < len(g); i += 1) {
+ free(g[i].path);
+ free(g[i].dependencies);
+ };
+ free(g);
+};
// html: true if the output can be inlined in a html script tag. This is
// important because code like e.g.
// let tag = "</script>";
// has to be escaped.
+// inputs are borrowed
fn tacker_js(inputpath: str, ofile: io::handle, html: bool) void = {
+ let g: dep_graph = [];
+ defer {
+ for (let i = 0z; i < len(g); i += 1)
+ free(g[i].dependencies);
+ free(g);
+ };
+ dep_add(void, inputpath, &g);
+ // TODO
+ for (let i = 0z; i < len(g); i += 1) {
+ fmt::printf("{}: {} - ", i, g[i].path)!;
+ const dep = g[i].dependencies;
+ for (let j = 0z; j < len(dep); j += 1) {
+ fmt::printf("{},", dep[j])!;
+ };
+ fmt::println("")!;
+ };
+ dep_graph_free(g);
+};
+
+let p_req: searchio::pattern = searchio::pattern {...};
+let p_newline: searchio::pattern = searchio::pattern {...};
+let p_commentend: searchio::pattern = searchio::pattern {...};
+let p_quotedouble: searchio::pattern = searchio::pattern {...};
+let p_quotesingle: searchio::pattern = searchio::pattern {...};
+
+@init fn init() void = {
+ // "/" has to be recognized as regex literal or comment start
+ p_req = searchio::compile(["require(", "/", "\"", "'", "`"]);
+ p_newline = searchio::compile(["\n"]);
+ p_commentend = searchio::compile(["*/"]);
+ p_quotedouble = searchio::compile(["\""]);
+ p_quotesingle = searchio::compile(["'"]);
+};
+
+@fini fn fini() void = {
+ defer searchio::finish(p_req);
+ defer searchio::finish(p_newline);
+ defer searchio::finish(p_commentend);
+ defer searchio::finish(p_quotedouble);
+ defer searchio::finish(p_quotesingle);
+};
+
+// Add a connection frompath -> deppath to the dependency graph
+// inputs are borrowed
+fn dep_add(frompath: (str | void), deppath: str, graph: *dep_graph) void = {
+ const g = *graph;
+ let depindex = 0z;
+ for (depindex < len(g) && g[depindex].path != deppath) depindex += 1;
+ if (depindex == len(g)) {
+ append(g, jsfile {
+ path = strings::dup(deppath),
+ dependencies = [],
+ scanned = false
+ });
+ };
+ // add link to the graph
+ if (frompath is str) {
+ const frompath = frompath: str;
+ let fromindex = 0z;
+ for (fromindex < len(g) && g[fromindex].path != frompath)
+ fromindex += 1;
+ append(g[fromindex].dependencies, depindex);
+ };
+ // scan deppath if neccessarry
+ if (g[depindex].scanned == false) {
+ g[depindex].scanned = true;
+ *graph = g;
+ dep_scan(deppath, graph);
+ };
+};
+
+// Recursively scan and add a file to the dependency graph
+// inputs are borrowed
+fn dep_scan(inputpath: str, graph: *dep_graph) void = {
const ifile = os::open(inputpath)!;
defer io::close(ifile)!;
- // TODO
+ // Read until require or comment or quote
+ // if start of string literal etc was found (disabled require)
+ let disabled = false;
for (true) {
- let buf: [1]u8 = [' '];
- if (io::read(ifile, buf) is io::EOF) return;
- io::write(ofile, buf)!;
+ const m = searchio::search(ifile, black_hole, p_req);
+ if (m is size) {
+ if (m == 0) {
+ if (disabled == false) {
+ const p = read_require(ifile,
+ inputpath);
+ if (p is str) {
+ const p = p: str;
+ defer free(p);
+ const p = resolve_path_require(
+ p, inputpath);
+ defer free(p);
+ dep_add(inputpath, p, graph);
+ };
+ } else {
+ fmt::fprintfln(os::stderr, "file \"{}\" could contain skipped require() calls.", inputpath)!;
+ break;
+ };
+ } else if (m == 1) {
+ // "/*", "//" or "/regex/"
+ const buf: [1]u8 = [' '];
+ if (io::read(ifile, buf) is io::EOF) break;
+ if (buf[0] == '/') {
+ searchio::search(ifile, black_hole,
+ p_newline);
+ } else if (buf[0] == '*') {
+ searchio::search(ifile, black_hole,
+ p_commentend);
+ } else disabled = true;
+ } else {
+ // '"', "'" or "`"
+ disabled = true;
+ };
+ } else break;
+ };
+};
+
+// Is returned if the require() is part of a longer identifier
+type no = void;
+
+// Parse the contents of a require() macro and return the file path.
+// Return value has to be freed.
+fn read_require(in: io::handle, path: str) (str | no) = {
+ // Check if require() is part of another identifier like my_require()
+ io::seek(in, -9, io::whence::CUR)!;
+ const buf: [1]u8 = [' '];
+ io::read(in, buf)!;
+ io::seek(in, 8, io::whence::CUR)!;
+ // this weird string contains all characters that are allowed in a js
+ // source file but not in an identifier
+ if (!strings::contains(" !%&()*+,-./:;<=>?[]^{|}~", buf[0]: u32: rune))
+ return no;
+
+ io::read(in, buf)!;
+ let broken = false;
+ if (buf[0] == '"' || buf[0] == '\'') {
+ let namebuf = bufio::dynamic(io::mode::WRITE);
+ const pattern = if (buf[0] == '\'') p_quotesingle
+ else p_quotedouble;
+ searchio::search(in, &namebuf, pattern);
+ let ret = strings::fromutf8(namebuf.buf);
+ io::read(in, buf)!;
+ if (buf[0] == ')') return ret;
};
+ fixed_fatalf("{}: broken require() call", path);
+ return ""; // will not be reached
};
diff --git a/helpers.ha b/helpers.ha
@@ -0,0 +1,119 @@
+use fs;
+use io;
+use os;
+use slices;
+use strings;
+
+// All bundled files must be within this directory so that malicious modules
+// cannot require arbitrary files on the file system.
+let basepath: str = "";
+@fini fn fini() void = free(basepath);
+
+// Cut a string to the last "/".
+// Return value is borrowed from the input.
+fn parent_dir(path: str) str = {
+ const bytes = strings::toutf8(path);
+ let i = len(bytes) - 1;
+ for (bytes[i] != '/') i -= 1;
+ return strings::fromutf8(bytes[..(i+1)]);
+};
+
+// Apply os::realpath and os::resolve.
+fn realpath_resolve(path: str) str = {
+ const p = match (os::realpath(path)) {
+ case let p: str => yield p;
+ case let p: fs::error =>
+ fixed_fatalf("path \"{}\" does not exist.", path);
+ yield ""; // unreachable
+ };
+ return os::resolve(p);
+};
+
+// path: to be resolved
+// from: path to the file (or directory) where the reference was found.
+// Return value has to be freed.
+fn resolve_path(path: str, from: str) str = {
+ if (strings::hasprefix(path, "http://") ||
+ strings::hasprefix(path, "https://")) {
+ fixed_fatalf("bundling of external resources is not allowed: \"{}\".",
+ path);
+ };
+ // directory path is relativ to base
+ // ends with "/"
+ const base = if (strings::hasprefix(path, "./") ||
+ strings::hasprefix(path, "../")) {
+ yield parent_dir(from);
+ } else {
+ yield basepath;
+ };
+ const r = strings::join("", base, path);
+ defer free(r);
+ const r = strings::dup(realpath_resolve(r));
+ if (!strings::hasprefix(r, basepath))
+ fixed_fatalf("file path \"{}\" violates the base path \"{}\".",
+ r, basepath);
+ return r;
+};
+
+// Works like resolve_path() but adds a .js extension if there is none
+fn resolve_path_require(path: str, from: str) str = {
+ return if (strings::hassuffix(path, ".js"))
+ resolve_path(path, from)
+ else {
+ const p = strings::join("", path, ".js");
+ const res = resolve_path(p, from);
+ free(p);
+ yield res;
+ };
+};
+
+// Return index of the last dot in the filename or -1 if the file contains no
+// dot.
+fn lastdotindex(filename: str) int = {
+ const filename = strings::toutf8(filename);
+ let index = (len(filename) - 1): int;
+ for (index >= 0 && filename[index] != '.') {
+ if (filename[index] == '/') return -1;
+ index -= 1;
+ };
+ return index;
+};
+
+// return value has to be freed.
+fn file_name_bundled(filename: str) str = {
+ let lastdot = lastdotindex(filename);
+ // files without extension get the .bundle at the end
+ if (lastdot == -1) lastdot = len(filename): int;
+
+ const output = strings::dup(filename);
+ const output = strings::toutf8(output);
+
+ const ext = strings::toutf8(".bundle");
+ let bptr: [7]*void = [&ext: *void ...];
+ for (let i = 0z; i < len(ext); i += 1) {
+ bptr[i] = &ext[i];
+ };
+ slices::insertinto(&output: *[]void, size(u8), lastdot: size, bptr...);
+ return strings::fromutf8(output);
+};
+
+@test fn file_name_bundled() void = {
+ assert(file_name_bundled("test.js") == "test.bundle.js");
+ assert(file_name_bundled("test.dot.js") == "test.dot.bundle.js");
+ assert(file_name_bundled("no-ext") == "no-ext.bundle");
+ assert(file_name_bundled("./dir.a/no-ext") == "./dir.a/no-ext.bundle");
+ assert(file_name_bundled("./test.dir/ütf8.html") ==
+ "./test.dir/ütf8.bundle.html");
+};
+
+// A file to write useless output to (like /dev/null)
+const black_hole: io::handle = &black_hole_s;
+const black_hole_s: io::stream = &black_hole_v;
+const black_hole_v: io::vtable = io::vtable {
+ reader = null,
+ writer = &black_hole_write,
+ ...
+};
+fn black_hole_write(s: *io::stream, buf: const []u8) (size | io::error) = {
+ return len(buf);
+};
diff --git a/path_helpers.ha b/path_helpers.ha
@@ -1,94 +0,0 @@
-use fs;
-use os;
-use slices;
-use strings;
-
-// All bundled files must be within this directory so that malicious modules
-// cannot require arbitrary files on the file system.
-let basepath: str = "";
-@fini fn fini() void = free(basepath);
-
-// Cuts a string to the last "/".
-// Return value is borrowed from the input.
-fn parent_dir(path: str) str = {
- const bytes = strings::toutf8(path);
- let i = len(bytes) - 1;
- for (bytes[i] != '/') i -= 1;
- return strings::fromutf8(bytes[..(i+1)]);
-};
-
-// Applies os::realpath and os::resolve.
-fn realpath_resolve(path: str) str = {
- const p = match (os::realpath(path)) {
- case let p: str => yield p;
- case let p: fs::error =>
- fixed_fatalf("path \"{}\" does not exist.", path);
- yield ""; // unreachable
- };
- return os::resolve(p);
-};
-
-// path: to be resolved
-// from: path to the file (or directory) where the reference was found.
-// Return value has to be freed.
-fn resolve_path(path: str, from: str) str = {
- if (strings::hasprefix(path, "http://") ||
- strings::hasprefix(path, "https://")) {
- fixed_fatalf("bundling of external resources is not allowed: \"{}\".",
- path);
- };
- // directory path is relativ to base
- // ends with "/"
- const base = if (strings::hasprefix(path, "./") ||
- strings::hasprefix(path, "../")) {
- yield parent_dir(from);
- } else {
- yield basepath;
- };
- const r = strings::join("", base, path);
- defer free(r);
- const r = strings::dup(realpath_resolve(r));
- if (!strings::hasprefix(r, basepath))
- fixed_fatalf("file path \"{}\" violates the base path \"{}\".",
- r, basepath);
- return r;
-};
-
-// Returns index of the last dot in the filename or -1 if the file contains no
-// dot.
-fn lastdotindex(filename: str) int = {
- const filename = strings::toutf8(filename);
- let index = (len(filename) - 1): int;
- for (index >= 0 && filename[index] != '.') {
- if (filename[index] == '/') return -1;
- index -= 1;
- };
- return index;
-};
-
-// return value has to be freed.
-fn file_name_bundled(filename: str) str = {
- let lastdot = lastdotindex(filename);
- // files without extension get the .bundle at the end
- if (lastdot == -1) lastdot = len(filename): int;
-
- const output = strings::dup(filename);
- const output = strings::toutf8(output);
-
- const ext = strings::toutf8(".bundle");
- let bptr: [7]*void = [&ext: *void ...];
- for (let i = 0z; i < len(ext); i += 1) {
- bptr[i] = &ext[i];
- };
- slices::insertinto(&output: *[]void, size(u8), lastdot: size, bptr...);
- return strings::fromutf8(output);
-};
-
-@test fn file_name_bundled() void = {
- assert(file_name_bundled("test.js") == "test.bundle.js");
- assert(file_name_bundled("test.dot.js") == "test.dot.bundle.js");
- assert(file_name_bundled("no-ext") == "no-ext.bundle");
- assert(file_name_bundled("./dir.a/no-ext") == "./dir.a/no-ext.bundle");
- assert(file_name_bundled("./test.dir/ütf8.html") ==
- "./test.dir/ütf8.bundle.html");
-};
diff --git a/test-page/a.js b/test-page/a.js
@@ -1,4 +1,4 @@
-// let testm = require("./b.js")
+let testm = require("./b.js")
// console.log(testm.hello());
let r = "this require('b.js') will not be macro-expanded.";
diff --git a/test-page/b.js b/test-page/b.js
@@ -1,4 +1,6 @@
+const c = require('./c');
+
module.exports = {
hello: () => ":)",
- c: require("./c"),
+ c,
}
diff --git a/test-page/c.js b/test-page/c.js
@@ -1,2 +1,2 @@
-console.log(require("./a.js"));
+console.log(require("./a.js")); // illegal -> circular dependency
exports.msg = ":)";