tacker

a simple web bundler
git clone https://tongong.net/git/tacker.git
Log | Files | Refs | README

commit f1167ebb146b2ae054eacdf77ac15ce48aa8d281
parent 261c39eb69b74678c8249838fec1c7cd4a8d859d
Author: tongong <tongong@gmx.net>
Date:   Sun, 29 May 2022 11:53:16 +0200

file path resolving functions

Diffstat:
MREADME.md | 7++++++-
Mtacker.ha | 155++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
2 files changed, 141 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md @@ -24,7 +24,7 @@ language. - binary data as base64 (e.g. `background-image: url(...)`) - JS - a subset of CommonJS modules - - `require(...)` with relative + - `require(...)` - `module.exports` and `exports` - binary data as base64 through custom `requireBinary(...)` function @@ -39,3 +39,8 @@ complete execution of the program at bundle-time to be able to reason about possible aliases to `require`. This is impossible and thus `require()` will be treated as special syntax. This implementation is thus wrong but should work for every sane usage of `require()`. + +The "conceptual module name space root" is the working directory. This means +that required paths which are not relative are resolved from the cwd. For +security reasons only files in the cwd can be bundled. This can be changed with +the `-p` option. Input and output file name stay relative to the cwd. diff --git a/tacker.ha b/tacker.ha @@ -1,7 +1,8 @@ -use encoding::hex; use encoding::utf8; use fmt; +use fs; use getopt; +use io; use os; use rt; use slices; @@ -27,6 +28,20 @@ fn runes_to_str(runes: []rune) str = { return *(&s: *const str); }; +// Returns index of the last dot in the filename or -1 if the file contains no +// dot. +fn lastdotindex(filename: []rune) int = { + let index = (len(filename) - 1): int; + for (index >= 0 && filename[index] != '.') { + if (filename[index] == '/') { + index = -1; + break; + }; + index -= 1; + }; + return index; +}; + // Input is borrowed, return value has to be freed. // test.js -> test.bundle.js // test.dot.js -> test.dot.bundle.js @@ -34,14 +49,7 @@ fn runes_to_str(runes: []rune) str = { fn file_name_bundled(ifile: str) str = { let slc = strings::runes(ifile); defer free(slc); - let lastdot = (len(slc) - 1): int; - for (lastdot >= 0 && slc[lastdot] != '.') { - if (slc[lastdot] == '/') { - lastdot = -1; - break; - }; - lastdot -= 1; - }; + let lastdot = lastdotindex(slc); // files without extension get the .bundle at the end if (lastdot == -1) lastdot = len(slc): int; @@ -57,28 +65,135 @@ fn file_name_bundled(ifile: str) str = { return runes_to_str(slc); }; +type filetype = enum { + HTML, + JS, + CSS, + BINARY, + UNKNOWN, +}; + +// Bundles all from input linked file and echos the bundle to the output stream. +// ifile: resolved path +fn tacker_write(ifile: str, ofile: io::handle, ft: filetype) void = { + // const data = match (os::open(ifile, fs::flags::RDONLY)) { + // case let data: io::file => + // yield data: io::handle; + // case let data: fs::error => + // fmt::fatalf("file \"{}\" does not exist.", ifile); + // }; + if (ft == filetype::UNKNOWN) { + let slc = strings::runes(ifile); + defer free(slc); + let extstart = lastdotindex(slc); + if (extstart == -1) + fmt::fatalf("file \"{}\" has broken filetype.", ifile); + let ext = runes_to_str(slc[(extstart + 1)..]); + defer free(ext); + static const knownft = [ + ("html", filetype::HTML), + ("js", filetype::JS), + ("css", filetype::CSS), + ]; + for (let i = 0z; i < len(knownft); i += 1) { + if (knownft[i].0 == ext) ft = knownft[i].1; + }; + }; + // TODO + fmt::println(switch (ft) { + case filetype::HTML => yield "html"; + case filetype::JS => yield "js"; + case filetype::CSS => yield "css"; + case filetype::BINARY => yield "bin"; + case filetype::UNKNOWN => yield "unknown"; + })!; +}; + +// All bundled files must be within this directory so that malicious modules +// cannot require arbitrary files on the file system. +let basepath: str = ""; +@fini fn fini() void = free(basepath); + +// Cuts a string to the last "/". +// Return value is borrowed from the input. +fn parent_dir(path: str) str = { + const bytes = strings::toutf8(path); + let i = len(bytes) - 1; + for (bytes[i] != '/') i -= 1; + return strings::fromutf8(bytes[..(i+1)]); +}; + +// Applys os::realpath and os::resolve. +fn realpath_resolve(path: str) str = { + const p = match (os::realpath(path)) { + case let p: str => yield p; + case let p: fs::error => + fmt::fatalf("path \"{}\" does not exist.", path); + }; + return os::resolve(p); +}; + +// path: to be resolved +// from: path to the file (or directory) where the reference was found. +// Return value has to be freed. +fn resolve_path(path: str, from: str) str = { + // directory path is relativ to + // ends with "/" + const base = if (strings::hasprefix(path, "./") || + strings::hasprefix(path, "../")) { + yield parent_dir(from); + } else { + yield basepath; + }; + const r = strings::join("", base, path); + defer free(r); + const r = strings::dup(realpath_resolve(r)); + if (!strings::hasprefix(r, basepath)) + fmt::fatalf("file path \"{}\" violates the base path \"{}\".", + r, basepath); + return r; +}; + export fn main() void = { const cmd = getopt::parse(os::args, "simple web bundler", ('f', "formats", "file formats to inline (comma seperated)"), + ('p', "basepath", "for resolving modules (defaults to cwd)"), "input-file", - "output-file", + "[output-file]", ); defer getopt::finish(&cmd); const alen = len(cmd.args); if (alen == 0) - fmt::fatal("At least the input file is as argument needed."); - if (alen > 2) fmt::fatal("Too many arguments passed."); - const ifile = cmd.args[0]; - const ofile = if (alen == 1) { - // generate output file name from input file name - yield file_name_bundled(ifile); - } else { - yield strings::dup(cmd.args[1]); + fmt::fatal("at least the input file is as argument needed."); + if (alen > 2) fmt::fatal("too many arguments passed."); + + basepath = strings::join("", os::getcwd(), "/"); + for (let i = 0z; i < len(cmd.opts); i += 1) { + if (cmd.opts[i].0 == 'p') { + free(basepath); + basepath = strings::join("", + realpath_resolve(cmd.opts[i].1), "/"); + if (basepath == "//") basepath = strings::fromutf8(strings::toutf8(basepath)[..1]); + }; }; + + const ifile = cmd.args[0]; + const ofile = if (alen == 1) file_name_bundled(ifile) + else strings::dup(cmd.args[1]); defer free(ofile); - fmt::println(ifile)!; - fmt::println(ofile)!; + const ofile = if (ofile == "-") os::stdout + else os::create(ofile, fs::mode::USER_RW | fs::mode::GROUP_R | + fs::mode::OTHER_R, fs::flags::WRONLY, fs::flags::TRUNC)! + : io::handle; + defer io::close(ofile)!; + + const ifile = strings::join("", "./", ifile); + defer free(ifile); + const defaultfrom = strings::join("", os::getcwd(), "/"); + defer free(defaultfrom); + tacker_write(resolve_path(ifile, defaultfrom), ofile, + filetype::UNKNOWN); };