tacker

a simple web bundler
git clone https://tongong.net/git/tacker.git
Log | Files | Refs | README

searchio.ha (3987B)


      1 use bufio;
      2 use io;
      3 use os;
      4 use sort;
      5 use strings;
      6 
      7 export type pattern = struct {
      8 	elems: []patternelem, // sorted
      9 	// reference to the string list before compile
     10 	original: []str,
     11 };
     12 export type patternelem = struct {
     13 	// if the first n bytes are identical to the first n bytes in the string
     14 	// before they are set to 0
     15 	data: []u8,
     16 	// index into the string list before compile
     17 	index: size,
     18 };
     19 
     20 fn patternelem_cmp(a: *void, b: *void) int = {
     21 	// the end of one of the strings will never be reached because than it
     22 	// would be a substring of the other string which make the other string
     23 	// impossible to find
     24 	const a: []u8 = (*(a: *patternelem)).data;
     25 	const b: []u8 = (*(b: *patternelem)).data;
     26 	for (let i = 0z; true; i += 1) {
     27 		if (a[i] < b[i]) return -1;
     28 		if (a[i] > b[i]) return 1;
     29 	};
     30 	return 0; // will never be reached
     31 };
     32 
     33 // pattern has to be finished
     34 export fn compile(s: []str) pattern = {
     35 	let p: []patternelem = [];
     36 	for (let i = 0z; i < len(s); i += 1) {
     37 		append(p, patternelem {
     38 			data = strings::toutf8(strings::dup(s[i])),
     39 			index = i,
     40 		});
     41 	};
     42 	sort::sort(p: []void, size(patternelem), &patternelem_cmp:
     43 		*sort::cmpfunc);
     44 	for (let i = len(p) - 1; i >= 1; i -= 1) {
     45 		for (let j = 0z; j < len(p[i].data) && j < len(p[i-1].data);
     46 				j += 1) {
     47 			if (p[i].data[j] == p[i-1].data[j]) {
     48 				p[i].data[j] = 0;
     49 			} else break;
     50 		};
     51 	};
     52 	return pattern { elems = p, original = strings::dupall(s) };
     53 };
     54 
     55 export fn finish(p: pattern) void = {
     56 	for (let i = 0z; i < len(p.elems); i += 1) {
     57 		free(p.elems[i].data);
     58 	};
     59 	strings::freeall(p.original);
     60 };
     61 
     62 // reads until one of the end strings is read and pipes all read bytes to ofile
     63 // (not the matched end itself)
     64 // does not work if the end of one pattern is the start of another
     65 // returns pattern index (index into string list before compile)
     66 export fn search(ifile: io::handle, ofile: io::handle, end: pattern)
     67 		(size | io::EOF) = {
     68 	// element in pattern array that is currently being matched
     69 	// -1 -> none of them
     70 	let curr_elem = 0z;
     71 	// index into the matched element that is checked next
     72 	let curr_index = 0z;
     73 	// if an element is matched to a certain point but then a byte is wrong
     74 	// this byte is stored here to maybe start a new match
     75 	let leftover: u8 = 0;
     76 	for (true) {
     77 		let buf: [1]u8 = [' '];
     78 		if (leftover != 0) {
     79 			buf[0] = leftover;
     80 			leftover = 0;
     81 		} else if (io::read(ifile, buf) is io::EOF) return io::EOF;
     82 		const buf = buf[0];
     83 
     84 		let nomatches = true;
     85 		for (let i = curr_elem; i < len(end.elems); i += 1) {
     86 			const e = end.elems[i].data;
     87 			if (curr_index != 0 && i != curr_elem &&
     88 				e[curr_index - 1] != 0) break;
     89 			if (e[curr_index] != 0 && e[curr_index] > buf) break;
     90 			if (buf == e[curr_index]) {
     91 				curr_elem = i;
     92 				curr_index += 1;
     93 				nomatches = false;
     94 				if (curr_index == len(end.elems[curr_elem]
     95 						.data))
     96 					return end.elems[curr_elem].index;
     97 				break;
     98 			};
     99 		};
    100 		if (nomatches) {
    101 			if (curr_index != 0) {
    102 				for (let i = 0z; i < curr_index; i += 1) {
    103 					let elem = curr_elem;
    104 					for (end.elems[elem].data[i] == 0) {
    105 						elem -= 1;
    106 					};
    107 					io::writeall(ofile, [end.elems[elem]
    108 						.data[i]])!;
    109 				};
    110 				curr_elem = 0;
    111 				curr_index = 0;
    112 				leftover = buf;
    113 			} else {
    114 				io::writeall(ofile, [buf])!;
    115 			};
    116 		};
    117 	};
    118 	return io::EOF; // unreachable
    119 };
    120 
    121 @test fn searchio() void = {
    122 	const input = "inputstreamtesttest word2word1 tertemp";
    123 	const input = &bufio::fixed(strings::toutf8(input), io::mode::READ);
    124 	const output = bufio::dynamic(io::mode::WRITE);
    125 	const outbuf = &output.buf;
    126 	const output = &output;
    127 	defer io::close(output)!;
    128 
    129 	const p = compile(["temp", "test", "word1"]);
    130 	const matches: [_]size = [1, 1, 2, 0];
    131 	let matchesindex = 0;
    132 	for (true) {
    133 		const m = search(input, output, p);
    134 		if (m is size) {
    135 			const m = m: size;
    136 			assert(matches[matchesindex] == m);
    137 			matchesindex += 1;
    138 		} else break;
    139 	};
    140 	assert(matchesindex == 4);
    141 	assert("inputstream word2 ter" == strings::fromutf8(*outbuf));
    142 	finish(p);
    143 };