searchio.ha (3987B)
1 use bufio; 2 use io; 3 use os; 4 use sort; 5 use strings; 6 7 export type pattern = struct { 8 elems: []patternelem, // sorted 9 // reference to the string list before compile 10 original: []str, 11 }; 12 export type patternelem = struct { 13 // if the first n bytes are identical to the first n bytes in the string 14 // before they are set to 0 15 data: []u8, 16 // index into the string list before compile 17 index: size, 18 }; 19 20 fn patternelem_cmp(a: *void, b: *void) int = { 21 // the end of one of the strings will never be reached because than it 22 // would be a substring of the other string which make the other string 23 // impossible to find 24 const a: []u8 = (*(a: *patternelem)).data; 25 const b: []u8 = (*(b: *patternelem)).data; 26 for (let i = 0z; true; i += 1) { 27 if (a[i] < b[i]) return -1; 28 if (a[i] > b[i]) return 1; 29 }; 30 return 0; // will never be reached 31 }; 32 33 // pattern has to be finished 34 export fn compile(s: []str) pattern = { 35 let p: []patternelem = []; 36 for (let i = 0z; i < len(s); i += 1) { 37 append(p, patternelem { 38 data = strings::toutf8(strings::dup(s[i])), 39 index = i, 40 }); 41 }; 42 sort::sort(p: []void, size(patternelem), &patternelem_cmp: 43 *sort::cmpfunc); 44 for (let i = len(p) - 1; i >= 1; i -= 1) { 45 for (let j = 0z; j < len(p[i].data) && j < len(p[i-1].data); 46 j += 1) { 47 if (p[i].data[j] == p[i-1].data[j]) { 48 p[i].data[j] = 0; 49 } else break; 50 }; 51 }; 52 return pattern { elems = p, original = strings::dupall(s) }; 53 }; 54 55 export fn finish(p: pattern) void = { 56 for (let i = 0z; i < len(p.elems); i += 1) { 57 free(p.elems[i].data); 58 }; 59 strings::freeall(p.original); 60 }; 61 62 // reads until one of the end strings is read and pipes all read bytes to ofile 63 // (not the matched end itself) 64 // does not work if the end of one pattern is the start of another 65 // returns pattern index (index into string list before compile) 66 export fn search(ifile: io::handle, ofile: io::handle, end: pattern) 67 (size | io::EOF) = { 68 // element in pattern array that is currently being matched 69 // -1 -> none of them 70 let curr_elem = 0z; 71 // index into the matched element that is checked next 72 let curr_index = 0z; 73 // if an element is matched to a certain point but then a byte is wrong 74 // this byte is stored here to maybe start a new match 75 let leftover: u8 = 0; 76 for (true) { 77 let buf: [1]u8 = [' ']; 78 if (leftover != 0) { 79 buf[0] = leftover; 80 leftover = 0; 81 } else if (io::read(ifile, buf) is io::EOF) return io::EOF; 82 const buf = buf[0]; 83 84 let nomatches = true; 85 for (let i = curr_elem; i < len(end.elems); i += 1) { 86 const e = end.elems[i].data; 87 if (curr_index != 0 && i != curr_elem && 88 e[curr_index - 1] != 0) break; 89 if (e[curr_index] != 0 && e[curr_index] > buf) break; 90 if (buf == e[curr_index]) { 91 curr_elem = i; 92 curr_index += 1; 93 nomatches = false; 94 if (curr_index == len(end.elems[curr_elem] 95 .data)) 96 return end.elems[curr_elem].index; 97 break; 98 }; 99 }; 100 if (nomatches) { 101 if (curr_index != 0) { 102 for (let i = 0z; i < curr_index; i += 1) { 103 let elem = curr_elem; 104 for (end.elems[elem].data[i] == 0) { 105 elem -= 1; 106 }; 107 io::writeall(ofile, [end.elems[elem] 108 .data[i]])!; 109 }; 110 curr_elem = 0; 111 curr_index = 0; 112 leftover = buf; 113 } else { 114 io::writeall(ofile, [buf])!; 115 }; 116 }; 117 }; 118 return io::EOF; // unreachable 119 }; 120 121 @test fn searchio() void = { 122 const input = "inputstreamtesttest word2word1 tertemp"; 123 const input = &bufio::fixed(strings::toutf8(input), io::mode::READ); 124 const output = bufio::dynamic(io::mode::WRITE); 125 const outbuf = &output.buf; 126 const output = &output; 127 defer io::close(output)!; 128 129 const p = compile(["temp", "test", "word1"]); 130 const matches: [_]size = [1, 1, 2, 0]; 131 let matchesindex = 0; 132 for (true) { 133 const m = search(input, output, p); 134 if (m is size) { 135 const m = m: size; 136 assert(matches[matchesindex] == m); 137 matchesindex += 1; 138 } else break; 139 }; 140 assert(matchesindex == 4); 141 assert("inputstream word2 ter" == strings::fromutf8(*outbuf)); 142 finish(p); 143 };