tacker

a simple web bundler
git clone https://tongong.net/git/tacker.git
Log | Files | Refs | README

commit 5c745a502396955c68f7b7aad70a81b169cf883b
parent 13df74dd541c89c3cdc407ff493f1016b22bfd10
Author: tongong <tongong@gmx.net>
Date:   Sun, 19 Jun 2022 16:21:30 +0200

added searchio module

Diffstat:
MREADME.md | 7+++++++
Mbundle_html.ha | 12+++++++++++-
Mbundle_js.ha | 3++-
Dpath-helpers.ha | 49-------------------------------------------------
Apath_helpers.ha | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Asearchio/searchio.ha | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rstring-helpers.ha -> string_helpers.ha | 0
Atest/index.html | 10++++++++++
8 files changed, 216 insertions(+), 51 deletions(-)

diff --git a/README.md b/README.md @@ -44,3 +44,10 @@ The "conceptual module name space root" is the working directory. This means that required paths which are not relative are resolved from the cwd. For security reasons only files in the cwd can be bundled. This can be changed with the `-p` option. Input and output file name stay relative to the cwd. + +`tacker` does not aim to be 100% spec-compliant. The goal is to work in all +common scenarios without laying to much emphasis on obscure edge cases. It is a +tacker after all - not an industrial robot. Though unlike a real-world tacker +your security should not be at hazard. In the case of javascript malicious +source files can obviously take over your bundle but they should never take +over your system. diff --git a/bundle_html.ha b/bundle_html.ha @@ -1,5 +1,15 @@ +use fmt; use io; +use os; +use searchio; +// inputs are borrowed fn tacker_html(ifile: str, ofile: io::handle) void = { - 0; + const ifile = os::open(ifile)!; + const p = searchio::compile([">", "<head", "<body", "<script", "test", "src"]); + for (true) { + if (searchio::search(ifile, ofile, p) is size) { + fmt::print("[replaced]")!; + } else break; + }; }; diff --git a/bundle_js.ha b/bundle_js.ha @@ -1,5 +1,6 @@ +use fmt; use io; fn tacker_js(ifile: str, ofile: io::handle) void = { - 0; + fmt::fprintln(ofile, "js not implemented yet.")!; }; diff --git a/path-helpers.ha b/path-helpers.ha @@ -1,49 +0,0 @@ -use fmt; -use fs; -use os; -use strings; - -// All bundled files must be within this directory so that malicious modules -// cannot require arbitrary files on the file system. -let basepath: str = ""; -@fini fn fini() void = free(basepath); - -// Cuts a string to the last "/". -// Return value is borrowed from the input. -fn parent_dir(path: str) str = { - const bytes = strings::toutf8(path); - let i = len(bytes) - 1; - for (bytes[i] != '/') i -= 1; - return strings::fromutf8(bytes[..(i+1)]); -}; - -// Applys os::realpath and os::resolve. -fn realpath_resolve(path: str) str = { - const p = match (os::realpath(path)) { - case let p: str => yield p; - case let p: fs::error => - fmt::fatalf("path \"{}\" does not exist.", path); - }; - return os::resolve(p); -}; - -// path: to be resolved -// from: path to the file (or directory) where the reference was found. -// Return value has to be freed. -fn resolve_path(path: str, from: str) str = { - // directory path is relativ to - // ends with "/" - const base = if (strings::hasprefix(path, "./") || - strings::hasprefix(path, "../")) { - yield parent_dir(from); - } else { - yield basepath; - }; - const r = strings::join("", base, path); - defer free(r); - const r = strings::dup(realpath_resolve(r)); - if (!strings::hasprefix(r, basepath)) - fmt::fatalf("file path \"{}\" violates the base path \"{}\".", - r, basepath); - return r; -}; diff --git a/path_helpers.ha b/path_helpers.ha @@ -0,0 +1,49 @@ +use fmt; +use fs; +use os; +use strings; + +// All bundled files must be within this directory so that malicious modules +// cannot require arbitrary files on the file system. +let basepath: str = ""; +@fini fn fini() void = free(basepath); + +// Cuts a string to the last "/". +// Return value is borrowed from the input. +fn parent_dir(path: str) str = { + const bytes = strings::toutf8(path); + let i = len(bytes) - 1; + for (bytes[i] != '/') i -= 1; + return strings::fromutf8(bytes[..(i+1)]); +}; + +// Applies os::realpath and os::resolve. +fn realpath_resolve(path: str) str = { + const p = match (os::realpath(path)) { + case let p: str => yield p; + case let p: fs::error => + fmt::fatalf("path \"{}\" does not exist.", path); + }; + return os::resolve(p); +}; + +// path: to be resolved +// from: path to the file (or directory) where the reference was found. +// Return value has to be freed. +fn resolve_path(path: str, from: str) str = { + // directory path is relativ to + // ends with "/" + const base = if (strings::hasprefix(path, "./") || + strings::hasprefix(path, "../")) { + yield parent_dir(from); + } else { + yield basepath; + }; + const r = strings::join("", base, path); + defer free(r); + const r = strings::dup(realpath_resolve(r)); + if (!strings::hasprefix(r, basepath)) + fmt::fatalf("file path \"{}\" violates the base path \"{}\".", + r, basepath); + return r; +}; diff --git a/searchio/searchio.ha b/searchio/searchio.ha @@ -0,0 +1,137 @@ +use fmt; +use io; +use os; +use sort; +use strings; + +// // reads until end is read and pipes all read bytes to ofile (end itself not) +// // returns true if end is reached, false on EOF +// fn read_until(ifile: io::handle, ofile: io::handle, end: str) bool = { +// let end = strings::toutf8(end); +// let currIndex = 0z; // the current index in the buffer to check +// for (true) { +// const buf: [1]u8 = [' ']; +// if (io::read(ifile, buf) is io::EOF) return false; +// if (buf[0] == end[currIndex]) { +// currIndex += 1; +// if (currIndex == len(end)) return true; +// } else { +// if (currIndex != 0) { +// io::write(ofile, end[..currIndex])!; +// currIndex = 0; +// }; +// io::write(ofile, buf)!; +// }; +// }; +// return false; // unreachable +// }; + +// sorted +export type pattern = []patternelem; +export type patternelem = struct { + // if the first n bytes are identical to the first n bytes in the string + // before they are set to 0 + data: []u8, + // index into the string list before compile + index: size, +}; + +fn patternelem_cmp(a: *void, b: *void) int = { + // the end of one of the strings will never be reached because than it + // would be a substring of the other string which make the other string + // impossible to find + const a: []u8 = (*(a: *patternelem)).data; + const b: []u8 = (*(b: *patternelem)).data; + for (let i = 0z; true; i += 1) { + if (a[i] < b[i]) return -1; + if (a[i] > b[i]) return 1; + }; + return 0; // will never be reached +}; + +// has to be freed TODO custom function for free +export fn compile(s: []str) pattern = { + let p: pattern = []; + for (let i = 0z; i < len(s); i += 1) { + append(p, patternelem { + data = strings::toutf8(s[i]), + index = i, + }); + }; + sort::sort(p: []void, size(patternelem), &patternelem_cmp: + *sort::cmpfunc); + for (let i = len(p) - 1; i >= 1; i -= 1) { + for (let j = 0z; j < len(p[i].data) && j < len(p[i-1].data); + j += 1) { + if (p[i].data[j] == p[i-1].data[j]) { + p[i].data[j] = 0; + } else break; + }; + }; + // for (let i = 0z; i < len(p); i+= 1) { + // fmt::print(p[i].index)!; + // for (let j = 0z; j < len(p[i].data); j+= 1) { + // fmt::print(" ")!; + // fmt::print(p[i].data[j])!; + // }; + // fmt::println()!; + // }; + return p; +}; + +// reads until one of the end strings is read and pipes all read bytes to ofile +// (not the matched end itself) +// does not work if the end of one pattern is the start of another +// returns pattern index (index into string list before compile) +export fn search(ifile: io::handle, ofile: io::handle, end: pattern) + (size | io::EOF) = { + // element in pattern array that is currently being matched + // -1 -> none of them + let curr_elem = 0z; + // index into the matched element that is checked next + let curr_index = 0z; + // if an element is matched to a certain point but then a byte is wrong + // this byte is stored here to maybe start a new match + let leftover: u8 = 0; + for (true) { + let buf: [1]u8 = [' ']; + if (leftover != 0) { + buf[0] = leftover; + leftover = 0; + } else if (io::read(ifile, buf) is io::EOF) return io::EOF; + const buf = buf[0]; + + let nomatches = true; + for (let i = curr_elem; i < len(end); i += 1) { + const e = end[i].data; + if (curr_index != 0 && i != curr_elem && + e[curr_index - 1] != 0) break; + if (e[curr_index] != 0 && e[curr_index] > buf) break; + if (buf == e[curr_index]) { + curr_elem = i; + curr_index += 1; + nomatches = false; + if (curr_index == len(end[curr_elem].data)) + return end[curr_elem].index; + break; + }; + }; + if (nomatches) { + if (curr_index != 0) { + for (let i = 0z; i < curr_index; i += 1) { + let elem = curr_elem; + for (end[elem].data[i] == 0) { + elem -= 1; + }; + io::write(ofile, [end[elem].data[i]])!; + }; + curr_elem = 0; + curr_index = 0; + leftover = buf; + } else { + io::write(ofile, [buf])!; + }; + }; + }; + return io::EOF; // unreachable +}; diff --git a/string-helpers.ha b/string_helpers.ha diff --git a/test/index.html b/test/index.html @@ -0,0 +1,10 @@ +<!DOCTYPE html> +<html> + <head> + <!-- asdf < test --> + <script src="./a.js"></script> + </head> + <body> + + </body> +</html>