// SPDX-License-Identifier: 0BSD // Reduced from https://git.sr.ht/~nabijaczleweli/voreutils/tree/5a9f9f296607494cf3fed7b83d9cef6945e57d28/item/cmd/wc.cpp // https://101010.pl/@nabijaczleweli/115774904189342717 // c++ -g -O3 -Wall -Wextra -fPIC -std=c++20 -fno-exceptions -fno-rtti wcl.cpp -o wcl.clang.memchr // c++ -DSTDCOUNT -g -O3 -Wall -Wextra -fPIC -std=c++20 -fno-exceptions -fno-rtti wcl.cpp -o wcl.clang.count // g++ -g -O3 -Wall -Wextra -fPIC -std=c++20 -fno-exceptions -fno-rtti wcl.cpp -o wcl.gcc.memchr // g++ -DSTDCOUNT -g -O3 -Wall -Wextra -fPIC -std=c++20 -fno-exceptions -fno-rtti wcl.cpp -o wcl.gcc.count // ./wcl.gcc.memchr r2 ran // 1.01 ± 0.06 times faster than ./wcl.clang.memchr r2 // 2.48 ± 0.14 times faster than ./wcl.clang.count r2 // 2.51 ± 0.12 times faster than ./wcl.gcc.count r2 #include #include #include #include #include #include #include #include #include #include #include struct record { std::uint64_t bytes, newlines; }; int main(int, const char * const * argv) { bool err{}; auto dump = [&](const record & data, const char * file) { auto first = true; for(auto && r : std::initializer_list{data.newlines, data.bytes}) { if(!std::exchange(first, false)) std::fputc(' ', stdout); std::fprintf(stdout, "%6" PRIu64 "", r); } if(file) std::fputc(' ', stdout), std::fputs(file, stdout); std::fputc('\n', stdout); }; auto process = [&](const char * file) { int fd = open(file, O_RDONLY | O_CLOEXEC); if(fd == -1) { std::fprintf(stderr, "%s: %s: %s\n", argv[0], file, std::strerror(errno)), err = true; return; } posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); record acc{}; char buf[64 * 1024]; for(ssize_t rd;;) { while((rd = read(fd, buf, sizeof(buf))) == -1 && errno == EINTR) ; if(rd == -1) { std::fprintf(stderr, "%s: %s: %s\n", argv[0], file, std::strerror(errno)), err = true; return; } if(!rd) break; if(true) { #if STDCOUNT acc.newlines += std::count(buf, buf + rd, '\n'); #else auto newitr = buf; auto len = rd; char * itr; // This is still suboptimal: a while(itr != end && *itr == '\n') ++acc; will be better if the data has consecutive needles while(len && (newitr = static_cast(std::memchr(itr = newitr, '\n', len)))) { ++acc.newlines; ++newitr; len -= newitr - itr; } #endif } acc.bytes += rd; } dump(acc, file); }; process(argv[1]); return err; }