commit 739eca230112f7a88b625f6b72b9a99f40274384
parent faeacbedade2cb621e276ae2cc6c1358aea06d33
Author: tongong <tongong@gmx.net>
Date: Tue, 28 Jun 2022 21:36:20 +0200
finished html->js bundling
Diffstat:
7 files changed, 193 insertions(+), 16 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,6 +1,7 @@
PREFIX=/usr/local
-tacker: *.ha
+SRC = *.ha searchio/*.ha
+tacker: $(SRC)
hare build -o tacker
test:
diff --git a/bundle_binary.ha b/bundle_binary.ha
@@ -2,10 +2,10 @@ use encoding::base64;
use io;
use os;
-fn tacker_binary(ifile: str, ofile: io::handle) void = {
- const enc = base64::newencoder(&base64::url_encoding, ofile);
- const ifile = os::open(ifile)!;
+fn tacker_binary(inputpath: str, ofile: io::handle) void = {
+ const ifile = os::open(inputpath)!;
defer io::close(ifile)!;
+ const enc = base64::newencoder(&base64::url_encoding, ofile);
io::copy(&enc, ifile)!;
io::close(&enc)!;
};
diff --git a/bundle_css.ha b/bundle_css.ha
@@ -1,6 +1,14 @@
use fmt;
use io;
+use os;
-fn tacker_css(ifile: str, ofile: io::handle) void = {
- fmt::fprint(ofile, "css is not implemented yet")!;
+fn tacker_css(inputpath: str, ofile: io::handle) void = {
+ const ifile = os::open(inputpath)!;
+ defer io::close(ifile)!;
+ // TODO
+ for (true) {
+ let buf: [1]u8 = [' '];
+ if (io::read(ifile, buf) is io::EOF) return;
+ io::write(ofile, buf)!;
+ };
};
diff --git a/bundle_html.ha b/bundle_html.ha
@@ -1,15 +1,163 @@
+use bufio;
use fmt;
use io;
use os;
use searchio;
+use strings;
+
+// https://html.spec.whatwg.org/multipage/syntax.html
// inputs are borrowed
-fn tacker_html(ifile: str, ofile: io::handle) void = {
- const ifile = os::open(ifile)!;
- const p = searchio::compile([">", "<head", "<body", "<script", "test", "src"]);
+fn tacker_html(inputpath: str, ofile: io::handle) void = {
+ const ifile = os::open(inputpath)!;
+ defer io::close(ifile)!;
+ const p_main = searchio::compile(["<!--", "<script", "<link",
+ "<audio", "<embed", "img", "source", "<track", "<video"]);
+ const p_comment = searchio::compile(["-->"]);
+ const p_tagclose = searchio::compile([">"]);
+ const p_scriptclose = searchio::compile(["</script>"]);
+ defer searchio::finish(p_main);
+ defer searchio::finish(p_comment);
+ defer searchio::finish(p_tagclose);
+ defer searchio::finish(p_scriptclose);
+
for (true) {
- if (searchio::search(ifile, ofile, p) is size) {
- fmt::print("[replaced]")!;
+ const m = searchio::search(ifile, ofile, p_main);
+ if (m is size) {
+ const m = m: size;
+ if (m == 0) { // html comments
+ fmt::fprint(ofile, "<!--")!;
+ searchio::search(ifile, ofile, p_comment);
+ fmt::fprint(ofile, "-->")!;
+ } else if (m == 1) { // <script>
+ let tagbuf = bufio::dynamic(io::mode::RDWR);
+ defer io::close(&tagbuf)!;
+ fmt::fprint(&tagbuf, "<script")!;
+ searchio::search(ifile, &tagbuf, p_tagclose);
+ fmt::fprint(&tagbuf, ">")!;
+ const src = tag_get_attr(tagbuf.buf,
+ strings::toutf8("src"));
+ if (src is not_found) {
+ io::write(ofile, tagbuf.buf)!;
+ } else {
+ searchio::search(ifile, &tagbuf,
+ p_scriptclose);
+ const src = src: tag_split;
+ const src = strings::fromutf8(src.1);
+ fmt::fprint(ofile, "<script>\n")!;
+ const src = resolve_path(src,
+ inputpath);
+ defer free(src);
+ tacker_js(src, ofile);
+ fmt::fprint(ofile, "</script>")!;
+ };
+ } else {
+ // TODO other embeds
+ fmt::fatal("[TODO] unimplemented html tag transform");
+ };
} else break;
};
};
+
+
+// Search an attribute inside an html tag.
+// Return the input buffer split at the borders of the value of the found
+// attribute.
+type not_found = void;
+type tag_split = ([]u8, []u8, []u8);
+type quotes = enum {
+ NORMAL,
+ SINGLE,
+ DOUBLE
+};
+fn tag_get_attr(tag: []u8, attr: []u8) (tag_split | not_found) = {
+ // STRATEGY
+ // go through the string and find all '=' that are not inside quotes
+ // find the corresponding attribute name and if it is fitting find the
+ // corresponding value
+ let state = quotes::NORMAL;
+ for (let i = 0z; i < len(tag); i += 1) {
+ switch (state) {
+ case quotes::NORMAL => {
+ if (tag[i] == '"') state = quotes::DOUBLE;
+ if (tag[i] == '\'') state = quotes::SINGLE;
+ if (tag[i] == '=') {
+ const pos_equal = i;
+ let pos_endattr = pos_equal - 1;
+ for (isspace(tag[pos_endattr]))
+ pos_endattr -= 1;
+ pos_endattr += 1;
+ let pos_startattr = pos_endattr - 1;
+ for (!isspace(tag[pos_startattr]))
+ pos_startattr -= 1;
+ pos_startattr += 1;
+ const amatch = tag[pos_startattr..pos_endattr];
+ if (cmpcase(attr, amatch)) {
+ let pos_startval = pos_equal + 1;
+ for (isspace(tag[pos_startval]))
+ pos_startval += 1;
+ let pos_endval = pos_startval;
+ if (tag[pos_startval] == '\'') {
+ pos_startval += 1;
+ pos_endval += 1;
+ for (tag[pos_endval] != '\'')
+ pos_endval += 1;
+ } else if (tag[pos_startval] == '"') {
+ pos_startval += 1;
+ pos_endval += 1;
+ for (tag[pos_endval] != '"')
+ pos_endval += 1;
+ } else {
+ for (!isspace(tag[pos_endval]))
+ pos_endval += 1;
+ };
+ return (tag[..pos_startval],
+ tag[pos_startval..pos_endval],
+ tag[pos_endval..]);
+ };
+ };
+ };
+ case quotes::SINGLE => {
+ if (tag[i] == '\'') state = quotes::NORMAL;
+ };
+ case quotes::DOUBLE => {
+ if (tag[i] == '"') state = quotes::NORMAL;
+ };
+ };
+ };
+ return not_found;
+};
+
+
+@test fn tag_get_attr() void = {
+ const t = strings::toutf8("<script attr1 attr2 = test attr3= 'asdasdf = \"asdfasdf' sRc=\"filename\">");
+ const a = strings::toutf8("src");
+ const res = tag_get_attr(t, a);
+ assert(res is tag_split);
+ const res = res: tag_split;
+ assert(strings::fromutf8(res.0) == "<script attr1 attr2 = test attr3= 'asdasdf = \"asdfasdf' sRc=\"");
+ assert(strings::fromutf8(res.1) == "filename");
+ assert(strings::fromutf8(res.2) == "\">");
+};
+
+
+// There are similar functions in the ascii module but there is no utf8 in
+// html attributes so this is sufficient.
+fn isspace(c: u8) bool =
+ c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
+
+
+fn tolower(c: u8) u8 = {
+ if ('A' <= c && c <= 'Z') return c - 'A' + 'a';
+ return c;
+};
+
+
+// Return true if the two buffers are case-insensitively equal
+fn cmpcase(a: []u8, b: []u8) bool = {
+ if (len(a) != len(b)) return false;
+ for (let i = 0z; i < len(a); i += 1) {
+ if (tolower(a[i]) != tolower(b[i])) return false;
+ };
+ return true;
+};
diff --git a/bundle_js.ha b/bundle_js.ha
@@ -1,6 +1,14 @@
use fmt;
use io;
+use os;
-fn tacker_js(ifile: str, ofile: io::handle) void = {
- fmt::fprintln(ofile, "js not implemented yet.")!;
+fn tacker_js(inputpath: str, ofile: io::handle) void = {
+ const ifile = os::open(inputpath)!;
+ defer io::close(ifile)!;
+ // TODO
+ for (true) {
+ let buf: [1]u8 = [' '];
+ if (io::read(ifile, buf) is io::EOF) return;
+ io::write(ofile, buf)!;
+ };
};
diff --git a/searchio/searchio.ha b/searchio/searchio.ha
@@ -27,12 +27,12 @@ fn patternelem_cmp(a: *void, b: *void) int = {
return 0; // will never be reached
};
-// has to be freed TODO custom function for free
+// pattern has to be finished
export fn compile(s: []str) pattern = {
let p: pattern = [];
for (let i = 0z; i < len(s); i += 1) {
append(p, patternelem {
- data = strings::toutf8(s[i]),
+ data = strings::toutf8(strings::dup(s[i])),
index = i,
});
};
@@ -49,6 +49,12 @@ export fn compile(s: []str) pattern = {
return p;
};
+export fn finish(p: pattern) void = {
+ for (let i = 0z; i < len(p); i += 1) {
+ free(p[i].data);
+ };
+};
+
// reads until one of the end strings is read and pipes all read bytes to ofile
// (not the matched end itself)
// does not work if the end of one pattern is the start of another
@@ -126,4 +132,5 @@ export fn search(ifile: io::handle, ofile: io::handle, end: pattern)
} else break;
};
assert("inputstream word2 ter" == strings::fromutf8(*outbuf));
+ finish(p);
};
diff --git a/test/index.html b/test/index.html
@@ -2,7 +2,12 @@
<html>
<head>
<!-- asdf < test -->
- <script src="./a.js"></script>
+ <!-- <script src="./a.js"></script> -->
+ <script attribute=value a2 = 'val2src="wrong"' src="./a.js"></script>
+ <script a3 = "asdf">
+ console.log("test");
+ let end = "</script";
+ </script>
</head>
<body>