From 1bc3a25505bc147731d8f1368aca86e41385a431 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 27 Dec 2020 07:02:35 -0800 Subject: [PATCH] Improve documentation The Cosmo API documentation page is pretty good now https://justine.lol/cosmopolitan/documentation.html --- Makefile | 6 +- ape/lib/pic.c | 140 - build/definitions.mk | 2 - dsp/core/scalevolume.c | 2 +- dsp/tty/ident.c | 2 +- dsp/tty/ttymove.c | 2 +- dsp/tty/ttyraster.c | 2 +- examples/cplusplus.cc | 2 +- examples/lstime.c | 2 +- examples/nesemu1.cc | 2 +- libc/alg/qsort.c | 8 +- libc/alg/replacestr.c | 2 +- libc/alg/replacestr16.c | 2 +- libc/bits/bits.h | 2 +- libc/bits/emptytonull.c | 2 +- libc/bits/mmintrin.internal.h | 14 - libc/bits/nulltoempty.c | 2 +- .../{safemacros.internal.h => safemacros.h} | 0 libc/bits/unsignedsubtract.c | 2 +- libc/bits/xmmintrin.internal.h | 1 - libc/calls/access.c | 1 + libc/calls/chdir.c | 1 + libc/calls/chmod.c | 13 +- libc/calls/chown.c | 1 + libc/calls/clock.c | 1 + libc/calls/clock_gettime.c | 3 +- libc/calls/close.c | 4 + libc/calls/commandv.c | 2 +- libc/calls/copyfd.c | 2 +- libc/calls/creat.c | 6 +- libc/calls/dprintf.c | 2 +- libc/calls/dup-nt.c | 4 +- libc/calls/dup.c | 1 + libc/calls/dup2.c | 1 + libc/calls/dup3.c | 2 + libc/calls/faccessat.c | 1 + libc/calls/fadvise.c | 1 + libc/calls/fallocate.c | 1 + libc/calls/fchmod.c | 1 + libc/calls/fchown.c | 1 + libc/calls/fchownat.c | 1 + libc/calls/fcntl.c | 1 + libc/calls/fdatasync.c | 1 + libc/calls/fstat-nt.c | 2 +- libc/calls/g_fds.c | 4 +- libc/calls/g_ntstartupinfo.c | 6 +- libc/calls/g_ntsysteminfo.c | 6 +- libc/calls/g_sighandrvas.c | 2 +- libc/calls/getenv.c | 2 +- libc/calls/hefty/filecmp.c | 2 +- libc/calls/hefty/get_current_dir_name.c | 2 +- libc/calls/hefty/ntspawn.c | 2 +- libc/calls/hefty/replaceuser.c | 2 +- libc/calls/internal.h | 1 - libc/calls/isdebuggerpresent.c | 2 +- libc/calls/kemptyfd.c | 2 +- libc/calls/kntprioritycombos.c | 4 +- libc/calls/kntprioritycombos.internal.h | 4 +- libc/calls/mprotect.greg.c | 3 +- libc/calls/nice.c | 2 +- libc/calls/now.c | 2 +- libc/calls/ntmagicpaths.c | 2 +- libc/calls/ntmagicpaths.internal.h | 2 +- libc/calls/ntsetprivilege.c | 16 +- libc/calls/preadv.c | 2 +- libc/calls/pwritev.c | 2 +- libc/calls/sched_setaffinity.c | 2 +- libc/calls/setpriority-nt.c | 2 +- libc/calls/sysinfo.c | 2 +- libc/calls/termios.internal.h | 2 +- libc/calls/zygote.c | 2 +- libc/dns/dnsnamecmp.c | 2 +- libc/dns/getaddrinfo.c | 2 +- libc/dns/gethoststxt.c | 2 +- libc/elf/elf.h | 2 +- libc/fmt/kerrnonames.S | 2 - libc/fmt/kmonthyearday.c | 2 +- libc/fmt/palandprintf.c | 36 +- libc/fmt/palandprintf.internal.h | 8 +- libc/fmt/strerror_r.c | 2 +- libc/fmt/vsnprintf.c | 2 +- libc/linux/mmap.h | 14 +- libc/log/asan.c | 25 +- libc/log/attachdebugger.c | 2 +- libc/log/backtrace2.c | 2 +- libc/log/cancolor.c | 6 +- libc/log/checkfail.c | 2 +- libc/log/commandvenv.c | 2 +- libc/log/die.c | 4 +- libc/log/gdb.h | 8 +- libc/log/gdbexec.c | 2 +- libc/log/gdbsync.c | 2 +- libc/log/getttysize.c | 2 +- libc/log/isterminalinarticulate.c | 2 +- libc/log/oncrash.c | 4 +- libc/math/fmax.c | 13 - libc/math/fmaxf.c | 13 - libc/math/fmaxl.c | 20 - libc/math/fmin.c | 13 - libc/math/fminf.c | 13 - libc/math/fminl.c | 20 - libc/mem/balloc.c | 7 +- libc/mem/bfree.c | 6 +- libc/mem/cxx/free.S | 6 +- libc/mem/cxx/malloc.S | 6 +- libc/mem/cxx/memalign.S | 8 +- libc/mem/internal.h | 2 +- libc/mem/realloc.S | 12 +- libc/mem/wcsdup.c | 3 + libc/nexgen32e/bsf.c | 17 + libc/nexgen32e/bsfl.c | 17 + libc/nexgen32e/bsfll.c | 17 + libc/nexgen32e/bsr.c | 17 + libc/nexgen32e/bsrl.c | 17 + libc/nexgen32e/bsrll.c | 17 + libc/nexgen32e/bsrmax.S | 21 +- libc/nexgen32e/crc32c-pure.c | 3 +- libc/nexgen32e/crc32init.S | 16 +- libc/nexgen32e/crc32z.c | 2 +- libc/nexgen32e/ffs.S | 7 +- libc/nexgen32e/ffsl.S | 7 +- libc/nexgen32e/kcp437.S | 64 +- libc/nexgen32e/ksigsetempty.S | 2 - libc/nexgen32e/ksigsetfull.S | 2 - libc/nexgen32e/kstarttsc.S | 2 + libc/nexgen32e/lzcnt.S | 7 +- libc/nexgen32e/lzcntl.S | 7 +- libc/nexgen32e/strstr-sse42.S | 7 +- libc/nexgen32e/strstr16-sse42.S | 7 +- libc/nexgen32e/tzcnt.S | 7 +- libc/nexgen32e/tzcntl.S | 7 +- libc/nt/nt/loader.h | 2 - libc/rand/g_rando32.c | 8 +- libc/rand/g_rando64.c | 6 +- libc/rand/getrandom.c | 2 +- libc/rand/rand32.c | 3 +- libc/rand/rand64.c | 3 +- libc/runtime/closesymboltable.c | 2 +- libc/runtime/directmap.c | 14 +- libc/runtime/directmap.h | 4 +- libc/runtime/directmapnt.c | 4 +- libc/runtime/ezmap.c | 15 +- libc/runtime/{ezmap.h => ezmap.internal.h} | 6 +- libc/runtime/ftrace.greg.c | 17 +- libc/runtime/getdosargv.c | 2 +- libc/runtime/grow.c | 34 +- libc/runtime/internal.h | 2 +- libc/runtime/interruptiblecall.c | 2 +- libc/runtime/isheap.c | 3 +- libc/runtime/mapanon.c | 10 + libc/runtime/mapelfread.c | 4 +- libc/runtime/mmap.c | 2 +- libc/runtime/ntgetmodule.c | 37 - libc/runtime/piro.c | 2 +- libc/runtime/ring.h | 17 - libc/runtime/ringalloc.c | 75 - libc/runtime/symbols.internal.h | 2 +- libc/runtime/unsetenv.c | 2 +- libc/sock/closesocket-nt.c | 2 +- libc/sock/kntwsadata.c | 2 +- libc/sock/sendfile.c | 2 +- libc/stdio/fscanf.c | 6 +- libc/stdio/g_stdbuf.c | 4 +- libc/stdio/g_stdio.c | 2 +- libc/stdio/printf.c | 34 +- libc/str/ispunct.c | 2 +- libc/str/kx86processormodels.c | 4 +- libc/str/sha256.c | 2 +- libc/str/strlcat.c | 2 +- libc/str/strlcpy.c | 2 +- libc/str/tpdecode.ncabi.c | 1 + libc/str/tpencode.ncabi.c | 1 + libc/sysv/systemfive.S | 9 +- libc/testlib/comborunner.c | 2 +- libc/testlib/formatstr.c | 2 +- libc/testlib/showerror_.c | 2 +- libc/testlib/testmain.c | 2 +- libc/testlib/testmem.c | 2 +- libc/time/localtime.c | 332 +- libc/time/time.h | 10 +- libc/tinymath/atanl.S | 7 +- libc/tinymath/copysign.S | 10 +- libc/tinymath/fabs.S | 4 + libc/tinymath/fmax.S | 29 - libc/{calls/fdkind.c => tinymath/fmax.c} | 20 +- libc/tinymath/fmaxf.S | 29 - libc/tinymath/{fminl.S => fmaxf.c} | 40 +- libc/tinymath/fmaxl.S | 40 - libc/tinymath/fmaxl.c | 36 + libc/tinymath/fmin.S | 29 - .../ringalloc_test.c => libc/tinymath/fmin.c | 40 +- libc/tinymath/fminf.S | 29 - libc/{runtime/ringfree.c => tinymath/fminf.c} | 21 +- libc/tinymath/fminl.c | 36 + libc/unicode/strnwidth16.c | 2 +- libc/unicode/strwidth16.c | 2 +- libc/x/xjoinpaths.c | 2 +- libc/x/xstrcat.c | 2 +- libc/zipos/parseuri.c | 2 +- libc/zipos/read.c | 2 +- libc/zipos/stat-impl.c | 2 +- test/libc/bits/unsignedsubtract_test.c | 2 +- test/libc/calls/fallocate_test.c | 2 +- test/libc/fmt/palandprintf_test.c | 2 +- test/libc/fmt/sprintf_s_test.c | 2 +- test/libc/mem/malloc_test.c | 2 +- test/libc/nexgen32e/lz4decode_test.c | 4 +- test/libc/nexgen32e/memmove_test.c | 2 +- test/libc/sock/inet_pton_test.c | 2 +- test/libc/str/sigset_test.c | 2 +- test/libc/str/tpdecode_test.c | 2 +- test/libc/str/tpencode_test.c | 2 +- test/tool/build/lib/asmdown_test.c | 135 + test/tool/build/lib/javadown_test.c | 50 + test/tool/viz/lib/test.mk | 3 +- third_party/avir/LICENSE | 26 - third_party/avir/README.cosmo | 5 - third_party/avir/README.md | 367 - third_party/avir/avir.h | 17065 ---------------- third_party/avir/avir.mk | 71 - third_party/avir/avir1.h | 17 - third_party/avir/avir_dil.h | 1013 - third_party/avir/avir_float4_sse.h | 324 - third_party/avir/avir_float8_avx.h | 365 - third_party/avir/lancir.h | 1494 -- third_party/avir/lanczos.cc | 40 - third_party/avir/lanczos.h | 13 - third_party/avir/lanczos1.cc | 77 - third_party/avir/lanczos1.h | 18 - third_party/avir/lanczos1.hpp | 11 - third_party/avir/lanczos1b.cc | 31 - third_party/avir/lanczos1b.h | 11 - third_party/avir/lanczos1f.cc | 63 - third_party/avir/lanczos1f.h | 18 - third_party/avir/lanczos1f.hpp | 11 - third_party/avir/lanczos3.cc | 30 - third_party/avir/notice.h | 11 - third_party/avir/resize.cc | 48 - third_party/avir/resize.h | 17 - third_party/chibicc/as.c | 56 +- third_party/chibicc/chibicc.c | 45 +- third_party/chibicc/chibicc.h | 9 +- third_party/chibicc/chibicc.mk | 9 - third_party/chibicc/codegen.c | 78 +- third_party/chibicc/dox1.c | 155 +- third_party/chibicc/dox2.c | 604 +- third_party/chibicc/parse.c | 83 +- third_party/chibicc/printast.c | 8 +- third_party/chibicc/test/builtin_test.c | 28 + third_party/chibicc/test/test.mk | 1 + third_party/compiler_rt/comparedf2.c | 7 +- third_party/compiler_rt/comparesf2.c | 7 +- third_party/compiler_rt/comparetf2.c | 10 +- third_party/compiler_rt/int_math.h | 2 +- third_party/compiler_rt/int_types.h | 6 +- third_party/compiler_rt/udivmodti4.c | 2 +- third_party/dlmalloc/dlindependent_calloc.c | 53 +- third_party/dlmalloc/dlmalloc.c | 6 +- third_party/dlmalloc/malloc_trim.c | 4 +- third_party/dlmalloc/mallopt.c | 8 +- third_party/gdtoa/README | 4 +- third_party/gdtoa/dmisc.c | 27 +- third_party/gdtoa/dtoa.c | 10 +- third_party/gdtoa/g_Qfmt.c | 120 - third_party/gdtoa/g_Qfmt_p.c | 133 - third_party/gdtoa/g__fmt.c | 22 +- third_party/gdtoa/g_ddfmt.c | 6 +- third_party/gdtoa/g_ddfmt_p.c | 16 +- third_party/gdtoa/g_dfmt.c | 6 +- third_party/gdtoa/g_dfmt_p.c | 12 +- third_party/gdtoa/g_ffmt.c | 6 +- third_party/gdtoa/g_ffmt_p.c | 10 +- third_party/gdtoa/g_xLfmt.c | 114 - third_party/gdtoa/g_xLfmt_p.c | 126 - third_party/gdtoa/g_xfmt.c | 6 +- third_party/gdtoa/g_xfmt_p.c | 16 +- third_party/gdtoa/gdtoa.c | 17 +- third_party/gdtoa/gdtoa.internal.h | 234 +- third_party/gdtoa/gethex.c | 9 +- third_party/gdtoa/gmisc.c | 12 +- third_party/gdtoa/hd_init.c | 16 +- third_party/gdtoa/hexnan.c | 15 +- third_party/gdtoa/misc.c | 137 +- third_party/gdtoa/printf.c.txt | 10 - third_party/gdtoa/printf.c0 | 1635 -- third_party/gdtoa/smisc.c | 39 +- third_party/gdtoa/stdio1.h.txt | 106 - third_party/gdtoa/strtoIQ.c | 67 - third_party/gdtoa/strtoId.c | 6 +- third_party/gdtoa/strtoIdd.c | 6 +- third_party/gdtoa/strtoIf.c | 6 +- third_party/gdtoa/strtoIg.c | 6 +- third_party/gdtoa/strtoIx.c | 6 +- third_party/gdtoa/strtoIxL.c | 66 - third_party/gdtoa/strtod.c | 18 +- third_party/gdtoa/strtodI.c | 12 +- third_party/gdtoa/strtodg.c | 50 +- third_party/gdtoa/strtodnrp.c | 6 +- third_party/gdtoa/strtof.c | 6 +- third_party/gdtoa/strtopQ.c | 110 - third_party/gdtoa/strtopd.c | 6 +- third_party/gdtoa/strtopdd.c | 6 +- third_party/gdtoa/strtopf.c | 6 +- third_party/gdtoa/strtopx.c | 18 +- third_party/gdtoa/strtopxL.c | 100 - third_party/gdtoa/strtorQ.c | 120 - third_party/gdtoa/strtord.c | 18 +- third_party/gdtoa/strtordd.c | 18 +- third_party/gdtoa/strtorf.c | 16 +- third_party/gdtoa/strtorx.c | 24 +- third_party/gdtoa/strtorxL.c | 111 - third_party/gdtoa/sum.c | 6 +- third_party/gdtoa/ulp.c | 9 +- third_party/getopt/getopt.c | 194 +- third_party/regex/regerror.c | 2 +- third_party/regex/regexec.c | 2 +- third_party/third_party.mk | 1 - tool/build/blinkenlights.c | 2 +- tool/build/helpop.c | 2 +- tool/build/lib/asmdown.c | 169 + tool/build/lib/asmdown.h | 24 + tool/build/lib/demangle.c | 2 +- tool/build/lib/dis.c | 2 +- tool/build/lib/interner.c | 2 +- tool/build/lib/javadown.c | 2 +- tool/build/lib/panel.c | 2 +- tool/build/lib/pty.c | 2 +- tool/build/lib/xmmtype.c | 8 + tool/build/lz4toasm.c | 2 +- tool/build/mkdeps.c | 4 +- tool/build/package.c | 2 +- tool/build/refactor.c | 2 +- tool/build/runit.c | 2 +- tool/build/runitd.c | 2 +- tool/build/zipobj.c | 2 +- tool/calc/calc.c | 2 +- tool/decode/elf.c | 2 +- tool/decode/lib/asmcodegen.c | 2 +- tool/decode/macho.c | 2 +- tool/decode/mkcombos.c | 2 +- tool/decode/mkwides.c | 2 +- tool/decode/pe2.c | 2 +- tool/decode/zip.c | 2 +- tool/net/echoserver.c | 2 +- tool/net/greenbean.c | 2 +- tool/net/redbean.c | 2 +- tool/viz/basicidea.c | 2 +- tool/viz/deathstar.c | 2 +- tool/viz/derasterize.c | 1 - tool/viz/fold.c | 2 +- tool/viz/generatematrix.c | 2 +- tool/viz/lib/formatstringtable-assembly.c | 2 +- tool/viz/lib/formatstringtable-code.c | 2 +- tool/viz/lib/formatstringtable.c | 2 +- {libc/str => tool/viz/lib}/knobs.c | 0 tool/viz/lib/vizlib.mk | 1 - tool/viz/lib/ycbcr2rgb3.c | 1 - tool/viz/life.c | 2 +- tool/viz/magikarp.c | 33 - tool/viz/memzoom.c | 2 +- tool/viz/printimage.c | 2 +- tool/viz/printpeb.c | 2 +- tool/viz/printvideo.c | 3 +- tool/viz/resize.c | 1 - tool/viz/tabalign.c | 2 +- tool/viz/tailf.c | 2 +- tool/viz/viz.mk | 1 - 367 files changed, 2542 insertions(+), 26178 deletions(-) delete mode 100644 ape/lib/pic.c delete mode 100644 libc/bits/mmintrin.internal.h rename libc/bits/{safemacros.internal.h => safemacros.h} (100%) delete mode 100644 libc/math/fmax.c delete mode 100644 libc/math/fmaxf.c delete mode 100644 libc/math/fmaxl.c delete mode 100644 libc/math/fmin.c delete mode 100644 libc/math/fminf.c delete mode 100644 libc/math/fminl.c rename libc/runtime/{ezmap.h => ezmap.internal.h} (64%) delete mode 100644 libc/runtime/ntgetmodule.c delete mode 100644 libc/runtime/ring.h delete mode 100644 libc/runtime/ringalloc.c delete mode 100644 libc/tinymath/fmax.S rename libc/{calls/fdkind.c => tinymath/fmax.c} (80%) delete mode 100644 libc/tinymath/fmaxf.S rename libc/tinymath/{fminl.S => fmaxf.c} (74%) delete mode 100644 libc/tinymath/fmaxl.S create mode 100644 libc/tinymath/fmaxl.c delete mode 100644 libc/tinymath/fmin.S rename test/libc/runtime/ringalloc_test.c => libc/tinymath/fmin.c (74%) delete mode 100644 libc/tinymath/fminf.S rename libc/{runtime/ringfree.c => tinymath/fminf.c} (81%) create mode 100644 libc/tinymath/fminl.c create mode 100644 test/tool/build/lib/asmdown_test.c delete mode 100644 third_party/avir/LICENSE delete mode 100644 third_party/avir/README.cosmo delete mode 100644 third_party/avir/README.md delete mode 100644 third_party/avir/avir.h delete mode 100644 third_party/avir/avir.mk delete mode 100644 third_party/avir/avir1.h delete mode 100644 third_party/avir/avir_dil.h delete mode 100644 third_party/avir/avir_float4_sse.h delete mode 100644 third_party/avir/avir_float8_avx.h delete mode 100644 third_party/avir/lancir.h delete mode 100644 third_party/avir/lanczos.cc delete mode 100644 third_party/avir/lanczos.h delete mode 100644 third_party/avir/lanczos1.cc delete mode 100644 third_party/avir/lanczos1.h delete mode 100644 third_party/avir/lanczos1.hpp delete mode 100644 third_party/avir/lanczos1b.cc delete mode 100644 third_party/avir/lanczos1b.h delete mode 100644 third_party/avir/lanczos1f.cc delete mode 100644 third_party/avir/lanczos1f.h delete mode 100644 third_party/avir/lanczos1f.hpp delete mode 100644 third_party/avir/lanczos3.cc delete mode 100644 third_party/avir/notice.h delete mode 100644 third_party/avir/resize.cc delete mode 100644 third_party/avir/resize.h delete mode 100644 third_party/gdtoa/g_Qfmt.c delete mode 100644 third_party/gdtoa/g_Qfmt_p.c delete mode 100644 third_party/gdtoa/g_xLfmt.c delete mode 100644 third_party/gdtoa/g_xLfmt_p.c delete mode 100644 third_party/gdtoa/printf.c.txt delete mode 100644 third_party/gdtoa/printf.c0 delete mode 100644 third_party/gdtoa/stdio1.h.txt delete mode 100644 third_party/gdtoa/strtoIQ.c delete mode 100644 third_party/gdtoa/strtoIxL.c delete mode 100644 third_party/gdtoa/strtopQ.c delete mode 100644 third_party/gdtoa/strtopxL.c delete mode 100644 third_party/gdtoa/strtorQ.c delete mode 100644 third_party/gdtoa/strtorxL.c create mode 100644 tool/build/lib/asmdown.c create mode 100644 tool/build/lib/asmdown.h rename {libc/str => tool/viz/lib}/knobs.c (100%) diff --git a/Makefile b/Makefile index 19c6794f..06465884 100644 --- a/Makefile +++ b/Makefile @@ -140,7 +140,6 @@ include net/http/http.mk #─┘ include third_party/lemon/lemon.mk include third_party/duktape/duktape.mk include third_party/regex/regex.mk -include third_party/avir/avir.mk include third_party/ctags/ctags.mk include third_party/third_party.mk include libc/testlib/testlib.mk @@ -313,9 +312,8 @@ o/cosmopolitan.h: \ $(foreach x,$(COSMOPOLITAN_HEADERS),$($(x)_HDRS)) @ACTION=ROLLUP TARGET=$@ build/do $^ >$@ -o/cosmopolitan.html: \ - o//third_party/chibicc/chibicc.com.dbg - o//third_party/chibicc/chibicc.com.dbg -J -fno-common -include libc/integral/normalize.inc -o $@ $(filter %.c,$(foreach x,$(COSMOPOLITAN_OBJECTS),$($(x)_SRCS))) +o/cosmopolitan.html: o/$(MODE)/third_party/chibicc/chibicc.com.dbg + o/$(MODE)/third_party/chibicc/chibicc.com.dbg -J -fno-common -include libc/integral/normalize.inc -o $@ $(filter-out %.s,$(foreach x,$(COSMOPOLITAN_OBJECTS),$($(x)_SRCS))) # UNSPECIFIED PREREQUISITES TUTORIAL # diff --git a/ape/lib/pic.c b/ape/lib/pic.c deleted file mode 100644 index 2ca33fdb..00000000 --- a/ape/lib/pic.c +++ /dev/null @@ -1,140 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "ape/lib/pc.h" - -#define ICW1_ICW4 0x01 /* ICW4 (not) needed */ -#define ICW1_SINGLE 0x02 /* Single (cascade) mode */ -#define ICW1_INTERVAL4 0x04 /* Call address interval 4 (8) */ -#define ICW1_LEVEL 0x08 /* Level triggered (edge) mode */ -#define ICW1_INIT 0x10 /* Initialization - required! */ - -#define ICW4_8086 0x01 /* 8086/88 (MCS-80/85) mode */ -#define ICW4_AUTO 0x02 /* Auto (normal) EOI */ -#define ICW4_BUF_SLAVE 0x08 /* Buffered mode/slave */ -#define ICW4_BUF_MASTER 0x0C /* Buffered mode/master */ -#define ICW4_SFNM 0x10 /* Special fully nested (not) */ - -static inline void io_wait(void) { - /* Magic technique from Linux, according to: - * wiki.osdev.org/index.php?title=Inline_Assembly/Examples&oldid=23541 - */ - outb(0x80, 0); -} - -void PIC_sendEOI(unsigned char irq) { - if (irq >= 8) outb(PIC2_CMD, PIC_EOI); - outb(PIC1_CMD, PIC_EOI); -} - -bool AreInterruptsEnabled() { - return (eflags() & kInterruptFlag) == kInterruptFlag; -} - -nodiscard forceinline unsigned long irqdisable(void) { - unsigned long eflags; - asm("pushf\n\t" - "cli\n\t" - "pop\t%0" - : "=r"(eflags) - : /* no inputs */ - : "cc"); - return eflags; -} - -forceinline void irqrestore(unsigned long eflags) { - asm volatile( - "push\t%0\n\t" - "popf" - : /* no outputs */ - : "rm"(eflags) - : "cc"); -} - -/** - * @param offset1 is vector offset for master PIC - * vectors on the master become offset1..offset1+7 - * @param offset2 is same for slave PIC: offset2..offset2+7 - **/ -void PIC_remap(int offset1, int offset2) { - unsigned char a1, a2; - a1 = inb(PIC1_DATA); // save masks - a2 = inb(PIC2_DATA); - outb(PIC1_CMD, - ICW1_INIT | - ICW1_ICW4); // starts the initialization sequence (in cascade mode) - io_wait(); - outb(PIC2_CMD, ICW1_INIT | ICW1_ICW4); - io_wait(); - outb(PIC1_DATA, offset1); // ICW2: Master PIC vector offset - io_wait(); - outb(PIC2_DATA, offset2); // ICW2: Slave PIC vector offset - io_wait(); - outb(PIC1_DATA, 4); // ICW3: tell Master PIC that there is a slave PIC at - // IRQ2 (0000 0100) - io_wait(); - outb(PIC2_DATA, 2); // ICW3: tell Slave PIC its cascade identity (0000 0010) - io_wait(); - outb(PIC1_DATA, ICW4_8086); - io_wait(); - outb(PIC2_DATA, ICW4_8086); - io_wait(); - outb(PIC1_DATA, a1); // restore saved masks. - outb(PIC2_DATA, a2); -} - -void IRQ_set_mask(unsigned char IRQline) { - uint16_t port; - uint8_t value; - if (IRQline < 8) { - port = PIC1_DATA; - } else { - port = PIC2_DATA; - IRQline -= 8; - } - value = inb(port) | (1 << IRQline); - outb(port, value); -} - -void IRQ_clear_mask(unsigned char IRQline) { - uint16_t port; - uint8_t value; - if (IRQline < 8) { - port = PIC1_DATA; - } else { - port = PIC2_DATA; - IRQline -= 8; - } - value = inb(port) & ~(1 << IRQline); - outb(port, value); -} - -static uint16_t __pic_get_irq_reg(int ocw3) { - /* OCW3 to PIC CMD to get the register values. PIC2 is chained, and - * represents IRQs 8-15. PIC1 is IRQs 0-7, with 2 being the chain */ - outb(PIC1_CMD, ocw3); - outb(PIC2_CMD, ocw3); - return (inb(PIC2_CMD) << 8) | inb(PIC1_CMD); -} - -/* Returns the combined value of the cascaded PICs irq request register */ -uint16_t pic_get_irr(void) { return __pic_get_irq_reg(PIC_READ_IRR); } - -/* Returns the combined value of the cascaded PICs in-service register */ -uint16_t pic_get_isr(void) { return __pic_get_irq_reg(PIC_READ_ISR); } diff --git a/build/definitions.mk b/build/definitions.mk index feafcd24..b8e87b44 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -126,7 +126,6 @@ TRADITIONAL = \ DEFAULT_CCFLAGS = \ -Wall \ -Werror \ - -fmerge-all-constants \ -fdebug-prefix-map="$(PWD)"= \ -frecord-gcc-switches @@ -142,7 +141,6 @@ DEFAULT_COPTS = \ -fno-ident \ -fno-common \ -fno-gnu-unique \ - -fmerge-constants \ -fstrict-aliasing \ -fstrict-overflow \ -fno-omit-frame-pointer \ diff --git a/dsp/core/scalevolume.c b/dsp/core/scalevolume.c index c7f9e656..a6bedb97 100644 --- a/dsp/core/scalevolume.c +++ b/dsp/core/scalevolume.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/core/core.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/limits.h" /** diff --git a/dsp/tty/ident.c b/dsp/tty/ident.c index 988c83ff..ae986a69 100644 --- a/dsp/tty/ident.c +++ b/dsp/tty/ident.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/tty/tty.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/calls/termios.h" diff --git a/dsp/tty/ttymove.c b/dsp/tty/ttymove.c index 549d0735..d685624b 100644 --- a/dsp/tty/ttymove.c +++ b/dsp/tty/ttymove.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/tty/itoa8.h" #include "dsp/tty/tty.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/limits.h" #include "libc/log/check.h" diff --git a/dsp/tty/ttyraster.c b/dsp/tty/ttyraster.c index aa1d36cb..4444f494 100644 --- a/dsp/tty/ttyraster.c +++ b/dsp/tty/ttyraster.c @@ -24,7 +24,7 @@ #include "dsp/tty/windex.h" #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/limits.h" #include "libc/log/check.h" #include "libc/log/log.h" diff --git a/examples/cplusplus.cc b/examples/cplusplus.cc index c523eb10..a5b1575a 100644 --- a/examples/cplusplus.cc +++ b/examples/cplusplus.cc @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" diff --git a/examples/lstime.c b/examples/lstime.c index ffeebfd3..ef1cf5ab 100644 --- a/examples/lstime.c +++ b/examples/lstime.c @@ -9,7 +9,7 @@ #endif #include "libc/alg/alg.h" #include "libc/alg/arraylist2.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/dirent.h" #include "libc/calls/struct/stat.h" diff --git a/examples/nesemu1.cc b/examples/nesemu1.cc index ec658610..991b6472 100644 --- a/examples/nesemu1.cc +++ b/examples/nesemu1.cc @@ -13,7 +13,7 @@ #include "libc/alg/arraylist2.internal.h" #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" #include "libc/calls/struct/itimerval.h" diff --git a/libc/alg/qsort.c b/libc/alg/qsort.c index 1083e6fc..9770a5e6 100644 --- a/libc/alg/qsort.c +++ b/libc/alg/qsort.c @@ -226,6 +226,7 @@ static noinline void smoothsort( * @param width is the size of each item * @param cmp is a callback returning <0, 0, or >0 * @param arg will optionally be passed as the third argument to cmp + * @see qsort() */ void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) { unsigned lp[12 * sizeof(unsigned)]; @@ -235,7 +236,12 @@ void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) { /** * Sorts array. - * @see qsort_r() for further details + * + * @param base points to an array to sort in-place + * @param count is the item count + * @param width is the size of each item + * @param cmp is a callback returning <0, 0, or >0 + * @see qsort_r() */ void qsort(void *base, size_t count, size_t width, int cmp(const void *, const void *)) { diff --git a/libc/alg/replacestr.c b/libc/alg/replacestr.c index fd9ec2fc..65ced5ba 100644 --- a/libc/alg/replacestr.c +++ b/libc/alg/replacestr.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" #include "libc/alg/arraylist2.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/str.h" #include "libc/sysv/errfuns.h" diff --git a/libc/alg/replacestr16.c b/libc/alg/replacestr16.c index dc875764..f88d4b3e 100644 --- a/libc/alg/replacestr16.c +++ b/libc/alg/replacestr16.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" #include "libc/alg/arraylist2.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/str.h" #include "libc/sysv/errfuns.h" diff --git a/libc/bits/bits.h b/libc/bits/bits.h index f87dddfc..02874e7f 100644 --- a/libc/bits/bits.h +++ b/libc/bits/bits.h @@ -166,7 +166,7 @@ unsigned long hamming(unsigned long, unsigned long) pureconst; │ cosmopolitan § bits » some assembly required ─╬─│┼ ╚────────────────────────────────────────────────────────────────────────────│*/ -/** +/* * Constraints for virtual machine flags. * @note we beseech clang devs for flag constraints */ diff --git a/libc/bits/emptytonull.c b/libc/bits/emptytonull.c index 0c70c0ba..0648f07f 100644 --- a/libc/bits/emptytonull.c +++ b/libc/bits/emptytonull.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" /** * Returns string where empty string is made null. diff --git a/libc/bits/mmintrin.internal.h b/libc/bits/mmintrin.internal.h deleted file mode 100644 index 63fcc4fb..00000000 --- a/libc/bits/mmintrin.internal.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_ -#define COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -typedef long long __m64 _Vector_size(8); -typedef float __v2sf _Vector_size(8); -typedef int __v2si _Vector_size(8); -typedef short __v4hi _Vector_size(8); -typedef char __v8qi _Vector_size(8); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_ */ diff --git a/libc/bits/nulltoempty.c b/libc/bits/nulltoempty.c index df5087db..cb7e95cf 100644 --- a/libc/bits/nulltoempty.c +++ b/libc/bits/nulltoempty.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" /** * Returns 𝑠 converting null to empty string. diff --git a/libc/bits/safemacros.internal.h b/libc/bits/safemacros.h similarity index 100% rename from libc/bits/safemacros.internal.h rename to libc/bits/safemacros.h diff --git a/libc/bits/unsignedsubtract.c b/libc/bits/unsignedsubtract.c index 75788501..e8ae3712 100644 --- a/libc/bits/unsignedsubtract.c +++ b/libc/bits/unsignedsubtract.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" /** * Subtracts unsigned integers w/ wraparound. diff --git a/libc/bits/xmmintrin.internal.h b/libc/bits/xmmintrin.internal.h index 45786c88..8d7c9d08 100644 --- a/libc/bits/xmmintrin.internal.h +++ b/libc/bits/xmmintrin.internal.h @@ -1,7 +1,6 @@ #ifndef COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_ #define COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_ #include "libc/bits/emmintrin.internal.h" -#include "libc/bits/mmintrin.internal.h" #include "libc/bits/progn.internal.h" #include "libc/dce.h" diff --git a/libc/calls/access.c b/libc/calls/access.c index 02a584bf..5980a3d6 100644 --- a/libc/calls/access.c +++ b/libc/calls/access.c @@ -30,6 +30,7 @@ * @param mode can be R_OK, W_OK, X_OK, F_OK * @return 0 if ok, or -1 and sets errno * @asyncsignalsafe + * @syscall */ int access(const char *path, int mode) { char16_t path16[PATH_MAX]; diff --git a/libc/calls/chdir.c b/libc/calls/chdir.c index 741acd40..0dbec8e3 100644 --- a/libc/calls/chdir.c +++ b/libc/calls/chdir.c @@ -25,6 +25,7 @@ /** * Sets current directory. * @asyncsignalsafe + * @syscall */ int chdir(const char *path) { if (!path) return efault(); diff --git a/libc/calls/chmod.c b/libc/calls/chmod.c index 2b6a4a18..cf70a288 100644 --- a/libc/calls/chmod.c +++ b/libc/calls/chmod.c @@ -26,15 +26,15 @@ /** * Changes permissions on file, e.g.: * - * CHECK_NE(-1, chmod("foo/bar.txt", 0644)); - * CHECK_NE(-1, chmod("o/default/program.com", 0755)); - * CHECK_NE(-1, chmod("privatefolder/", 0700)); + * CHECK_NE(-1, chmod("foo/bar.txt", 0644)); + * CHECK_NE(-1, chmod("o/default/program.com", 0755)); + * CHECK_NE(-1, chmod("privatefolder/", 0700)); * * The esoteric bits generally available on System Five are: * - * CHECK_NE(-1, chmod("/opt/", 01000)); // sticky bit - * CHECK_NE(-1, chmod("/usr/bin/sudo", 04755)); // setuid bit - * CHECK_NE(-1, chmod("/usr/bin/wall", 02755)); // setgid bit + * CHECK_NE(-1, chmod("/opt/", 01000)); // sticky bit + * CHECK_NE(-1, chmod("/usr/bin/sudo", 04755)); // setuid bit + * CHECK_NE(-1, chmod("/usr/bin/wall", 02755)); // setgid bit * * This works on Windows NT if you ignore the error ;-) * @@ -43,6 +43,7 @@ * @errors ENOENT, ENOTDIR, ENOSYS * @asyncsignalsafe * @see fchmod() + * @syscall */ int chmod(const char *pathname, uint32_t mode) { if (!pathname) return efault(); diff --git a/libc/calls/chown.c b/libc/calls/chown.c index 2f222106..0b317d72 100644 --- a/libc/calls/chown.c +++ b/libc/calls/chown.c @@ -33,6 +33,7 @@ * @see /etc/passwd for user ids * @see /etc/group for group ids * @asyncsignalsafe + * @syscall */ int chown(const char *pathname, uint32_t uid, uint32_t gid) { if (!pathname) return efault(); diff --git a/libc/calls/clock.c b/libc/calls/clock.c index 80daf04e..b17202c0 100644 --- a/libc/calls/clock.c +++ b/libc/calls/clock.c @@ -25,6 +25,7 @@ * Returns how much CPU program has consumed on time-sharing system. * * @return value that can be divided by CLOCKS_PER_SEC, or -1 w/ errno + * @see clock_gettime() */ int64_t clock(void) { struct timespec ts; diff --git a/libc/calls/clock_gettime.c b/libc/calls/clock_gettime.c index bb011355..509b581f 100644 --- a/libc/calls/clock_gettime.c +++ b/libc/calls/clock_gettime.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/timespec.h" @@ -51,6 +51,7 @@ * errno isn't restored to its original value, to detect prec. loss * @see strftime(), gettimeofday() * @asyncsignalsafe + * @syscall */ int clock_gettime(int clockid, struct timespec *out_ts) { /* TODO(jart): Just ignore O/S for MONOTONIC and measure RDTSC on start */ diff --git a/libc/calls/close.c b/libc/calls/close.c index 2754a29c..b75d0882 100644 --- a/libc/calls/close.c +++ b/libc/calls/close.c @@ -28,8 +28,12 @@ /** * Closes file descriptor. * + * This function may be used for file descriptors returned by socket, + * accept, epoll_create, and zipos file descriptors too. + * * @return 0 on success, or -1 w/ errno * @asyncsignalsafe + * @syscall */ int close(int fd) { int rc; diff --git a/libc/calls/commandv.c b/libc/calls/commandv.c index 666c1440..474a8f00 100644 --- a/libc/calls/commandv.c +++ b/libc/calls/commandv.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/progn.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/errno.h" diff --git a/libc/calls/copyfd.c b/libc/calls/copyfd.c index 0540263f..c13ecaa2 100644 --- a/libc/calls/copyfd.c +++ b/libc/calls/copyfd.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/errno.h" diff --git a/libc/calls/creat.c b/libc/calls/creat.c index ebc5ddec..4789060e 100644 --- a/libc/calls/creat.c +++ b/libc/calls/creat.c @@ -23,12 +23,16 @@ /** * Creates new file, returning open()'d file descriptor. * + * This function is shorthand for: + * + * open(file, O_CREAT | O_WRONLY | O_TRUNC, mode) + * * @param file is a UTF-8 string, which is truncated if it exists * @param mode is an octal user/group/other permission, e.g. 0755 * @return a number registered with the system to track the open file, * which must be stored using a 64-bit type in order to support both * System V and Windows, and must be closed later on using close() - * @see touch() + * @see open(), touch() * @asyncsignalsafe */ nodiscard int creat(const char *file, uint32_t mode) { diff --git a/libc/calls/dprintf.c b/libc/calls/dprintf.c index aa0f4d0f..ebe3aa54 100644 --- a/libc/calls/dprintf.c +++ b/libc/calls/dprintf.c @@ -20,7 +20,7 @@ #include "libc/calls/calls.h" /** - * Formats string directly to system i/o device. + * Formats string directly to file descriptor. */ int(dprintf)(int fd, const char *fmt, ...) { int rc; diff --git a/libc/calls/dup-nt.c b/libc/calls/dup-nt.c index e21b425b..3de6b05e 100644 --- a/libc/calls/dup-nt.c +++ b/libc/calls/dup-nt.c @@ -43,8 +43,8 @@ textwindows int dup$nt(int oldfd, int newfd, int flags) { return -1; } if (DuplicateHandle(GetCurrentProcess(), g_fds.p[oldfd].handle, - GetCurrentProcess(), &g_fds.p[newfd].handle, 0, - flags & O_CLOEXEC, kNtDuplicateSameAccess)) { + GetCurrentProcess(), &g_fds.p[newfd].handle, 0, true, + kNtDuplicateSameAccess)) { g_fds.p[newfd].kind = g_fds.p[oldfd].kind; g_fds.p[newfd].flags = flags; return newfd; diff --git a/libc/calls/dup.c b/libc/calls/dup.c index 8510d9f4..73fe08e9 100644 --- a/libc/calls/dup.c +++ b/libc/calls/dup.c @@ -27,6 +27,7 @@ * @param fd remains open afterwards * @return some arbitrary new number for fd * @asyncsignalsafe + * @syscall */ nodiscard int dup(int fd) { if (!IsWindows()) { diff --git a/libc/calls/dup2.c b/libc/calls/dup2.c index 6cfc5cf5..84366f00 100644 --- a/libc/calls/dup2.c +++ b/libc/calls/dup2.c @@ -29,6 +29,7 @@ * unless it's equal to oldfd, in which case dup2() is a no-op * @return new file descriptor, or -1 w/ errno * @asyncsignalsafe + * @syscall */ int dup2(int oldfd, int newfd) { if (oldfd == newfd) return newfd; diff --git a/libc/calls/dup3.c b/libc/calls/dup3.c index 292d5608..6d769be7 100644 --- a/libc/calls/dup3.c +++ b/libc/calls/dup3.c @@ -33,6 +33,8 @@ * @param newfd if already assigned, is silently closed beforehand; * unless it's equal to oldfd, in which case dup2() is a no-op * @flags can have O_CLOEXEC + * @see dup(), dup2() + * @syscall */ int dup3(int oldfd, int newfd, int flags) { if (oldfd == newfd) return einval(); diff --git a/libc/calls/faccessat.c b/libc/calls/faccessat.c index 2a36c592..ce0c1670 100644 --- a/libc/calls/faccessat.c +++ b/libc/calls/faccessat.c @@ -32,6 +32,7 @@ * @param flags can be R_OK, W_OK, X_OK, F_OK * @return 0 if ok, or -1 and sets errno * @asyncsignalsafe + * @syscall */ int faccessat(int dirfd, const char *path, int mode, uint32_t flags) { if (!path) return efault(); diff --git a/libc/calls/fadvise.c b/libc/calls/fadvise.c index 3aa7a8fb..d4f632c9 100644 --- a/libc/calls/fadvise.c +++ b/libc/calls/fadvise.c @@ -30,6 +30,7 @@ * @param len 0 means ‘til end of file * @param advice can be MADV_SEQUENTIAL, MADV_RANDOM, etc. * @return -1 on error + * @syscall */ int fadvise(int fd, uint64_t offset, uint64_t len, int advice) { if (!IsWindows()) { diff --git a/libc/calls/fallocate.c b/libc/calls/fallocate.c index dcd54b9d..ab090dca 100644 --- a/libc/calls/fallocate.c +++ b/libc/calls/fallocate.c @@ -38,6 +38,7 @@ * @param length is how much physical space to reserve / commit * @return 0 on success, or -1 w/ errno * @see ftruncate() + * @syscall */ int fallocate(int fd, int32_t mode, int64_t offset, int64_t length) { int rc; diff --git a/libc/calls/fchmod.c b/libc/calls/fchmod.c index 58f23ff8..2866f207 100644 --- a/libc/calls/fchmod.c +++ b/libc/calls/fchmod.c @@ -41,6 +41,7 @@ * @errors ENOSYS * @asyncsignalsafe * @see chmod() + * @syscall */ int fchmod(int fd, uint32_t mode) { /* TODO(jart): Windows */ diff --git a/libc/calls/fchown.c b/libc/calls/fchown.c index 728c54a5..51e47ca5 100644 --- a/libc/calls/fchown.c +++ b/libc/calls/fchown.c @@ -29,6 +29,7 @@ * @return 0 on success, or -1 w/ errno * @see /etc/passwd for user ids * @see /etc/group for group ids + * @syscall */ int fchown(int fd, uint32_t uid, uint32_t gid) { /* TODO(jart): Windows? */ diff --git a/libc/calls/fchownat.c b/libc/calls/fchownat.c index 98053649..a9139d00 100644 --- a/libc/calls/fchownat.c +++ b/libc/calls/fchownat.c @@ -32,6 +32,7 @@ * @see /etc/passwd for user ids * @see /etc/group for group ids * @asyncsignalsafe + * @syscall */ int fchownat(int dirfd, const char *pathname, uint32_t uid, uint32_t gid, uint32_t flags) { diff --git a/libc/calls/fcntl.c b/libc/calls/fcntl.c index 9e42702a..b3f310c9 100644 --- a/libc/calls/fcntl.c +++ b/libc/calls/fcntl.c @@ -30,6 +30,7 @@ * @param arg can be FD_CLOEXEC, etc. depending * @return 0 on success, or -1 w/ errno * @asyncsignalsafe + * @syscall */ int fcntl(int fd, int cmd, ...) { va_list va; diff --git a/libc/calls/fdatasync.c b/libc/calls/fdatasync.c index 2e4fc806..f70c340f 100644 --- a/libc/calls/fdatasync.c +++ b/libc/calls/fdatasync.c @@ -27,6 +27,7 @@ * @return 0 on success, or -1 w/ errno * @see fsync(), sync_file_range() * @asyncsignalsafe + * @syscall */ int fdatasync(int fd) { if (!IsWindows()) { diff --git a/libc/calls/fstat-nt.c b/libc/calls/fstat-nt.c index f2cf25a0..d90fc049 100644 --- a/libc/calls/fstat-nt.c +++ b/libc/calls/fstat-nt.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/calls/struct/stat.h" #include "libc/fmt/conv.h" diff --git a/libc/calls/g_fds.c b/libc/calls/g_fds.c index 350390b4..974f6059 100644 --- a/libc/calls/g_fds.c +++ b/libc/calls/g_fds.c @@ -25,9 +25,9 @@ STATIC_YOINK("_init_g_fds"); -struct Fds g_fds; +hidden struct Fds g_fds; -void InitializeFileDescriptors(void) { +hidden void InitializeFileDescriptors(void) { struct Fds *fds; fds = VEIL("r", &g_fds); pushmov(&fds->f, 3ul); diff --git a/libc/calls/g_ntstartupinfo.c b/libc/calls/g_ntstartupinfo.c index eddb6024..2c4aea03 100644 --- a/libc/calls/g_ntstartupinfo.c +++ b/libc/calls/g_ntstartupinfo.c @@ -21,10 +21,6 @@ #include "libc/nt/startupinfo.h" #include "libc/nt/struct/startupinfo.h" -/** - * GetStartupInfo() singleton. - * @see libc/runtime/winmain.c - */ -struct NtStartupInfo g_ntstartupinfo; +hidden struct NtStartupInfo g_ntstartupinfo; STATIC_YOINK("_init_g_ntstartupinfo"); diff --git a/libc/calls/g_ntsysteminfo.c b/libc/calls/g_ntsysteminfo.c index 77690a40..d72e6ff0 100644 --- a/libc/calls/g_ntsysteminfo.c +++ b/libc/calls/g_ntsysteminfo.c @@ -21,10 +21,6 @@ #include "libc/nt/struct/systeminfo.h" #include "libc/nt/systeminfo.h" -/** - * GetSystemInfo() singleton. - * @see libc/runtime/winmain.c - */ -struct NtSystemInfo g_ntsysteminfo; +hidden struct NtSystemInfo g_ntsysteminfo; STATIC_YOINK("_init_g_ntsysteminfo"); diff --git a/libc/calls/g_sighandrvas.c b/libc/calls/g_sighandrvas.c index fc6f09ee..275e05b8 100644 --- a/libc/calls/g_sighandrvas.c +++ b/libc/calls/g_sighandrvas.c @@ -19,4 +19,4 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/internal.h" -int g_sighandrvas[NSIG]; +hidden int g_sighandrvas[NSIG]; diff --git a/libc/calls/getenv.c b/libc/calls/getenv.c index 47795703..1347c239 100644 --- a/libc/calls/getenv.c +++ b/libc/calls/getenv.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" diff --git a/libc/calls/hefty/filecmp.c b/libc/calls/hefty/filecmp.c index 8fa29399..a9282d99 100644 --- a/libc/calls/hefty/filecmp.c +++ b/libc/calls/hefty/filecmp.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/errno.h" #include "libc/str/str.h" diff --git a/libc/calls/hefty/get_current_dir_name.c b/libc/calls/hefty/get_current_dir_name.c index f9d6ebeb..74d68da2 100644 --- a/libc/calls/hefty/get_current_dir_name.c +++ b/libc/calls/hefty/get_current_dir_name.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" diff --git a/libc/calls/hefty/ntspawn.c b/libc/calls/hefty/ntspawn.c index eecddfc0..c3977f49 100644 --- a/libc/calls/hefty/ntspawn.c +++ b/libc/calls/hefty/ntspawn.c @@ -20,7 +20,7 @@ #include "libc/alg/alg.h" #include "libc/alg/arraylist.internal.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/ntspawn.h" #include "libc/calls/internal.h" diff --git a/libc/calls/hefty/replaceuser.c b/libc/calls/hefty/replaceuser.c index 34d479fe..6ca244be 100644 --- a/libc/calls/hefty/replaceuser.c +++ b/libc/calls/hefty/replaceuser.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" diff --git a/libc/calls/internal.h b/libc/calls/internal.h index f52f75cb..a30cce28 100644 --- a/libc/calls/internal.h +++ b/libc/calls/internal.h @@ -68,7 +68,6 @@ hidden extern const struct NtSecurityAttributes kNtIsInheritable; ssize_t __getemptyfd(void) hidden; int __ensurefds(int) hidden; void __removefd(int) hidden; -enum FdKind fdkind(int) hidden nosideeffect; bool __isfdopen(int) hidden nosideeffect; bool __isfdkind(int, enum FdKind) hidden nosideeffect; diff --git a/libc/calls/isdebuggerpresent.c b/libc/calls/isdebuggerpresent.c index 0232daf0..846a092f 100644 --- a/libc/calls/isdebuggerpresent.c +++ b/libc/calls/isdebuggerpresent.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/dce.h" diff --git a/libc/calls/kemptyfd.c b/libc/calls/kemptyfd.c index 0b83e2f0..92b59a47 100644 --- a/libc/calls/kemptyfd.c +++ b/libc/calls/kemptyfd.c @@ -19,4 +19,4 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/internal.h" -const struct Fd kEmptyFd; +hidden const struct Fd kEmptyFd; diff --git a/libc/calls/kntprioritycombos.c b/libc/calls/kntprioritycombos.c index 13853576..5080dfaa 100644 --- a/libc/calls/kntprioritycombos.c +++ b/libc/calls/kntprioritycombos.c @@ -25,7 +25,7 @@ #include "libc/nt/enum/processcreationflags.h" #include "libc/nt/enum/threadpriority.h" -const struct NtPriorityCombo kNtPriorityCombos[] = { +hidden const struct NtPriorityCombo kNtPriorityCombos[] = { {-20, ffs(kNtHighPriorityClass), kNtThreadPriorityHighest, 15}, {-18, ffs(kNtHighPriorityClass), kNtThreadPriorityTimeCritical, 15}, {-17, ffs(kNtNormalPriorityClass), kNtThreadPriorityTimeCritical, 15}, @@ -54,4 +54,4 @@ const struct NtPriorityCombo kNtPriorityCombos[] = { {19, ffs(kNtIdlePriorityClass), kNtThreadPriorityIdle, 1}, }; -const unsigned kNtPriorityCombosLen = ARRAYLEN(kNtPriorityCombos); +hidden const unsigned kNtPriorityCombosLen = ARRAYLEN(kNtPriorityCombos); diff --git a/libc/calls/kntprioritycombos.internal.h b/libc/calls/kntprioritycombos.internal.h index 2658b14b..147ab867 100644 --- a/libc/calls/kntprioritycombos.internal.h +++ b/libc/calls/kntprioritycombos.internal.h @@ -10,8 +10,8 @@ struct NtPriorityCombo { int8_t prio; }; -extern const unsigned kNtPriorityCombosLen; -extern const struct NtPriorityCombo kNtPriorityCombos[]; +hidden extern const unsigned kNtPriorityCombosLen; +hidden extern const struct NtPriorityCombo kNtPriorityCombos[]; COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/calls/mprotect.greg.c b/libc/calls/mprotect.greg.c index 332817fb..8fd83838 100644 --- a/libc/calls/mprotect.greg.c +++ b/libc/calls/mprotect.greg.c @@ -26,6 +26,8 @@ #include "libc/nt/thunk/msabi.h" #include "libc/sysv/consts/nr.h" +extern __msabi typeof(VirtualProtect) *const __imp_VirtualProtect; + /** * Modifies restrictions on virtual memory address range. * @@ -34,7 +36,6 @@ * @see mmap() */ int mprotect(void *addr, uint64_t len, int prot) { - extern __msabi typeof(VirtualProtect) *const __imp_VirtualProtect; bool cf; int64_t rc; uint32_t oldprot; diff --git a/libc/calls/nice.c b/libc/calls/nice.c index 237b743a..1c05bb34 100644 --- a/libc/calls/nice.c +++ b/libc/calls/nice.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/fmt/conv.h" #include "libc/sysv/consts/prio.h" diff --git a/libc/calls/now.c b/libc/calls/now.c index d410bb9d..428db6ad 100644 --- a/libc/calls/now.c +++ b/libc/calls/now.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" #include "libc/bits/initializer.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/nexgen32e/rdtsc.h" diff --git a/libc/calls/ntmagicpaths.c b/libc/calls/ntmagicpaths.c index 5792ef65..6f42acfc 100644 --- a/libc/calls/ntmagicpaths.c +++ b/libc/calls/ntmagicpaths.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/ntmagicpaths.internal.h" -const struct NtMagicPaths kNtMagicPaths = { +hidden const struct NtMagicPaths kNtMagicPaths = { #define TAB(NAME, STRING) STRING, #include "libc/calls/ntmagicpaths.inc" #undef TAB diff --git a/libc/calls/ntmagicpaths.internal.h b/libc/calls/ntmagicpaths.internal.h index 20b2be0f..c441843f 100644 --- a/libc/calls/ntmagicpaths.internal.h +++ b/libc/calls/ntmagicpaths.internal.h @@ -9,7 +9,7 @@ struct NtMagicPaths { #undef TAB }; -extern const struct NtMagicPaths kNtMagicPaths; +hidden extern const struct NtMagicPaths kNtMagicPaths; COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/calls/ntsetprivilege.c b/libc/calls/ntsetprivilege.c index ede094ea..bdb13aec 100644 --- a/libc/calls/ntsetprivilege.c +++ b/libc/calls/ntsetprivilege.c @@ -17,19 +17,21 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/internal.h" #include "libc/nt/privilege.h" #include "libc/nt/struct/tokenprivileges.h" -#include "libc/calls/internal.h" /** * Sets NT permission thing, e.g. * - * int64_t htoken; - * if (OpenProcessToken(GetCurrentProcess(), - * kNtTokenAdjustPrivileges | kNtTokenQuery, &htoken)) { - * ntsetprivilege(htoken, u"SeManageVolumePrivilege", kNtSePrivilegeEnabled); - * CloseHandle(htoken); - * } + * int64_t htoken; + * if (OpenProcessToken(GetCurrentProcess(), + * kNtTokenAdjustPrivileges | kNtTokenQuery, + * &htoken)) { + * ntsetprivilege(htoken, u"SeManageVolumePrivilege", + * kNtSePrivilegeEnabled); + * CloseHandle(htoken); + * } */ textwindows bool32 ntsetprivilege(int64_t token, const char16_t *name, uint32_t attrs) { diff --git a/libc/calls/preadv.c b/libc/calls/preadv.c index 0f0e3a0f..eb0f1b7d 100644 --- a/libc/calls/preadv.c +++ b/libc/calls/preadv.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/iovec.h" diff --git a/libc/calls/pwritev.c b/libc/calls/pwritev.c index 2d98fc21..93efb84a 100644 --- a/libc/calls/pwritev.c +++ b/libc/calls/pwritev.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/iovec.h" diff --git a/libc/calls/sched_setaffinity.c b/libc/calls/sched_setaffinity.c index d6f024dc..97dc2cbb 100644 --- a/libc/calls/sched_setaffinity.c +++ b/libc/calls/sched_setaffinity.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/dce.h" diff --git a/libc/calls/setpriority-nt.c b/libc/calls/setpriority-nt.c index f74b125c..c84dedf2 100644 --- a/libc/calls/setpriority-nt.c +++ b/libc/calls/setpriority-nt.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/calls/kntprioritycombos.internal.h" #include "libc/nt/process.h" diff --git a/libc/calls/sysinfo.c b/libc/calls/sysinfo.c index 0154d9b4..74cad971 100644 --- a/libc/calls/sysinfo.c +++ b/libc/calls/sysinfo.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/sysinfo.h" diff --git a/libc/calls/termios.internal.h b/libc/calls/termios.internal.h index eb567cec..c8e044d0 100644 --- a/libc/calls/termios.internal.h +++ b/libc/calls/termios.internal.h @@ -1,7 +1,7 @@ #ifndef COSMOPOLITAN_LIBC_CALLS_TERMIOS_INTERNAL_H_ #define COSMOPOLITAN_LIBC_CALLS_TERMIOS_INTERNAL_H_ #ifndef __STRICT_ANSI__ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/struct/metatermios.internal.h" #include "libc/calls/struct/termios.h" #include "libc/str/str.h" diff --git a/libc/calls/zygote.c b/libc/calls/zygote.c index 657c57a5..f3c1c6b9 100644 --- a/libc/calls/zygote.c +++ b/libc/calls/zygote.c @@ -20,7 +20,7 @@ #include "libc/calls/internal.h" #include "libc/nt/struct/securityattributes.h" -const struct NtSecurityAttributes kNtIsInheritable = { +hidden const struct NtSecurityAttributes kNtIsInheritable = { sizeof(struct NtSecurityAttributes), NULL, true, diff --git a/libc/dns/dnsnamecmp.c b/libc/dns/dnsnamecmp.c index 22638fb4..b044c543 100644 --- a/libc/dns/dnsnamecmp.c +++ b/libc/dns/dnsnamecmp.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/dns/dns.h" #include "libc/str/str.h" diff --git a/libc/dns/getaddrinfo.c b/libc/dns/getaddrinfo.c index 04aa346f..cd214292 100644 --- a/libc/dns/getaddrinfo.c +++ b/libc/dns/getaddrinfo.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/dns/dns.h" #include "libc/dns/hoststxt.h" diff --git a/libc/dns/gethoststxt.c b/libc/dns/gethoststxt.c index 129ef1f5..c4e2911c 100644 --- a/libc/dns/gethoststxt.c +++ b/libc/dns/gethoststxt.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" #include "libc/bits/pushpop.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/dce.h" #include "libc/dns/hoststxt.h" #include "libc/fmt/fmt.h" diff --git a/libc/elf/elf.h b/libc/elf/elf.h index 88fa7fd7..e736dbcd 100644 --- a/libc/elf/elf.h +++ b/libc/elf/elf.h @@ -4,7 +4,7 @@ #include "libc/elf/struct/phdr.h" #include "libc/elf/struct/shdr.h" #include "libc/elf/struct/sym.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ diff --git a/libc/fmt/kerrnonames.S b/libc/fmt/kerrnonames.S index f4263722..1be51f56 100644 --- a/libc/fmt/kerrnonames.S +++ b/libc/fmt/kerrnonames.S @@ -22,8 +22,6 @@ .source __FILE__ / Embeds ASCII names for errno constants into binary. -/ -/ @see libc/sysv/consts.sh .section .rodata,"aS",@progbits kErrnoNames: / diff --git a/libc/fmt/kmonthyearday.c b/libc/fmt/kmonthyearday.c index 79b13b34..b973aef0 100644 --- a/libc/fmt/kmonthyearday.c +++ b/libc/fmt/kmonthyearday.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/time/time.h" -const unsigned short kMonthYearDay[2][12] = { +hidden const unsigned short kMonthYearDay[2][12] = { {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}, {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335}, }; diff --git a/libc/fmt/palandprintf.c b/libc/fmt/palandprintf.c index da677674..674534ef 100644 --- a/libc/fmt/palandprintf.c +++ b/libc/fmt/palandprintf.c @@ -91,27 +91,27 @@ static int ppatoi(const char **str) { * * Precision Modifiers * - * - `%.8s` supplied byte length (obeys nul terminator) - * - `%.*s` supplied byte length argument (obeys nul terminator) - * - `%`.*s` supplied byte length argument c escaped (ignores nul terminator) - * - `%#.*s` supplied byte length argument visualized (ignores nul terminator) - * - `%.*hs` supplied char16_t length argument (obeys nul terminator) - * - `%.*ls` supplied wchar_t length argument (obeys nul terminator) + * - `%.8s` supplied byte length (obeys nul terminator) + * - `%.*s` supplied byte length argument (obeys nul terminator) + * - ``%`.*s`` supplied byte length argument c escaped (ignores nul term) + * - `%#.*s` supplied byte length argument visualized (ignores nul term) + * - `%.*hs` supplied char16_t length argument (obeys nul terminator) + * - `%.*ls` supplied wchar_t length argument (obeys nul terminator) * * Formatting Modifiers * - * - `%,d` thousands separators - * - `%'s` escaped c string literal - * - `%`c` c escaped character - * - `%`'c` c escaped character quoted - * - `%`s` c escaped string - * - `%`'s` c escaped string quoted - * - `%`s` escaped double quoted c string literal - * - `%`c` escaped double quoted c character literal - * - `%+d` plus leftpad if positive (aligns w/ negatives) - * - `% d` space leftpad if positive (aligns w/ negatives) - * - `%#s` datum (radix 256 null-terminated ibm cp437) - * - `%#x` int (radix 16 hexadecimal w/ 0x prefix if not zero) + * - `%,d` thousands separators + * - `%'s` escaped c string literal + * - ``%`c`` c escaped character + * - ``%`'c`` c escaped character quoted + * - ``%`s`` c escaped string + * - ``%`'s`` c escaped string quoted + * - ``%`s`` escaped double quoted c string literal + * - ``%`c`` escaped double quoted c character literal + * - `%+d` plus leftpad if positive (aligns w/ negatives) + * - `% d` space leftpad if positive (aligns w/ negatives) + * - `%#s` datum (radix 256 null-terminated ibm cp437) + * - `%#x` int (radix 16 hexadecimal w/ 0x prefix if not zero) * * @note implementation detail of printf(), snprintf(), etc. * @see printf() for wordier documentation diff --git a/libc/fmt/palandprintf.internal.h b/libc/fmt/palandprintf.internal.h index 44b47f0b..a549c5d4 100644 --- a/libc/fmt/palandprintf.internal.h +++ b/libc/fmt/palandprintf.internal.h @@ -7,12 +7,12 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -int spacepad(int(long, void *), void *, unsigned long) hidden; -int ftoa(int(long, void *), void *, long double, int, unsigned long, +int spacepad(int (*)(long, void *), void *, unsigned long) hidden; +int ftoa(int (*)(long, void *), void *, long double, int, unsigned long, unsigned long) hidden; -int stoa(int(long, void *), void *, void *, unsigned long, unsigned long, +int stoa(int (*)(long, void *), void *, void *, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char) hidden; -int ntoa(int(long, void *), void *, va_list, unsigned char, unsigned long, +int ntoa(int (*)(long, void *), void *, va_list, unsigned char, unsigned long, unsigned long, unsigned long, unsigned char, const char *) hidden; COSMOPOLITAN_C_END_ diff --git a/libc/fmt/strerror_r.c b/libc/fmt/strerror_r.c index 3ccd3973..739871b4 100644 --- a/libc/fmt/strerror_r.c +++ b/libc/fmt/strerror_r.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/dce.h" #include "libc/errno.h" #include "libc/fmt/fmt.h" diff --git a/libc/fmt/vsnprintf.c b/libc/fmt/vsnprintf.c index 4d8d844a..8f29f083 100644 --- a/libc/fmt/vsnprintf.c +++ b/libc/fmt/vsnprintf.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/dce.h" #include "libc/fmt/fmt.h" #include "libc/limits.h" diff --git a/libc/linux/mmap.h b/libc/linux/mmap.h index 024a72bd..a23fdfaf 100644 --- a/libc/linux/mmap.h +++ b/libc/linux/mmap.h @@ -5,14 +5,14 @@ forceinline long LinuxMmap(void *addr, size_t size, long prot, long flags, long fd, long off) { long rc; - register long r10 asm("r10") = flags; - register long r8 asm("r8") = fd; - register long r9 asm("r9") = off; - asm volatile("syscall" + asm volatile("mov\t%5,%%r10\n\t" + "mov\t%6,%%r8\n\t" + "mov\t%7,%%r9\n\t" + "syscall" : "=a"(rc) - : "0"(9), "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8), - "r"(r9) - : "rcx", "r11", "memory"); + : "0"(9), "D"(addr), "S"(size), "d"(prot), "g"(flags), "g"(fd), + "g"(off) + : "rcx", "r8", "r9", "r10", "r11", "memory"); return rc; } diff --git a/libc/log/asan.c b/libc/log/asan.c index c8ba2cb8..0f36d77e 100644 --- a/libc/log/asan.c +++ b/libc/log/asan.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/fmt/itoa.h" @@ -44,13 +44,13 @@ STATIC_YOINK("_init_asan"); * so it can emit fast code, that checks the validity of each memory op * with byte granularity, by probing shadow memory. * - * AddressSanitizer dedicates one-eighth of the virtual address space + * - AddressSanitizer dedicates one-eighth of the virtual address space * to its shadow memory and uses a direct mapping with a scale and * offset to translate an application address to its corresponding * shadow address. Given the application memory address Addr, the * address of the shadow byte is computed as (Addr>>3)+Offset." * - * We use the following encoding for each shadow byte: 0 means that + * - We use the following encoding for each shadow byte: 0 means that * all 8 bytes of the corresponding application memory region are * addressable; k (1 ≤ k ≤ 7) means that the first k bytes are * addressible; any negative value indicates that the entire 8-byte @@ -60,11 +60,11 @@ STATIC_YOINK("_init_asan"); * * Here's what the generated code looks like for 64-bit reads: * - * movq %addr,%tmp - * shrq $3,%tmp - * cmpb $0,0x7fff8000(%tmp) - * jnz abort - * movq (%addr),%dst + * movq %addr,%tmp + * shrq $3,%tmp + * cmpb $0,0x7fff8000(%tmp) + * jnz abort + * movq (%addr),%dst */ #define HOOK(HOOK, IMPL) \ @@ -142,7 +142,6 @@ static const char *__asan_describe_access_poison(int c) { case kAsanUnscoped: return "unscoped"; default: - DebugBreak(); return "poisoned"; } } @@ -399,9 +398,8 @@ void __asan_map_shadow(void *p, size_t n) { b = ROUNDUP(SHADOW(ROUNDUP((uintptr_t)p + n, 8)), 1 << 16) >> 16; for (; a < b; ++a) { if (!__asan_is_mapped(a)) { - sm = DirectMap((void *)((uintptr_t)a << 16), 1 << 16, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + sm = __mmap((void *)((uintptr_t)a << 16), 1 << 16, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); if (sm.addr == MAP_FAILED || TrackMemoryInterval(&_mmi, a, a, sm.maphandle, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED) == -1) { @@ -412,7 +410,8 @@ void __asan_map_shadow(void *p, size_t n) { } static char *__asan_get_stack_base(void) { - register uintptr_t rsp asm("rsp"); + uintptr_t rsp; + asm("mov\t%%rsp,%0" : "=r"(rsp)); return (char *)ROUNDDOWN(ROUNDDOWN(rsp, STACKSIZE), FRAMESIZE); } diff --git a/libc/log/attachdebugger.c b/libc/log/attachdebugger.c index 8ab048c3..bed441e3 100644 --- a/libc/log/attachdebugger.c +++ b/libc/log/attachdebugger.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" #include "libc/fmt/fmt.h" diff --git a/libc/log/backtrace2.c b/libc/log/backtrace2.c index 5114cfab..471637e0 100644 --- a/libc/log/backtrace2.c +++ b/libc/log/backtrace2.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" #include "libc/alg/bisectcarleft.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" diff --git a/libc/log/cancolor.c b/libc/log/cancolor.c index bc697dcf..0daabe2e 100644 --- a/libc/log/cancolor.c +++ b/libc/log/cancolor.c @@ -41,15 +41,15 @@ * It's a common antipattern to check isatty(STDERR_FILENO), since that * usually makes colors harder to get than they are to remove: * - * sed 's/\x1b\[[;[:digit:]]*m//g' uncolor.txt + * sed 's/\x1b\[[;[:digit:]]*m//g' uncolor.txt * * Ideally, all software should be updated to understand color, since * it's been formally standardized nearly as long as ASCII. Even old * MS-DOS supports it (but Windows didn't until Windows 10) yet even * tools like less may need wrapper scripts, e.g.: * - * #!/bin/sh - * LESSCHARSET=UTF-8 exec /usr/bin/less -RS "$@" + * #!/bin/sh + * LESSCHARSET=UTF-8 exec /usr/bin/less -RS "$@" * * It's that easy fam. */ diff --git a/libc/log/checkfail.c b/libc/log/checkfail.c index 9e812ff6..a14597ac 100644 --- a/libc/log/checkfail.c +++ b/libc/log/checkfail.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/errno.h" diff --git a/libc/log/commandvenv.c b/libc/log/commandvenv.c index a5fd8f6e..538c91f7 100644 --- a/libc/log/commandvenv.c +++ b/libc/log/commandvenv.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/log/log.h" #include "libc/mem/mem.h" diff --git a/libc/log/die.c b/libc/log/die.c index 984b6bb3..c2a603b1 100644 --- a/libc/log/die.c +++ b/libc/log/die.c @@ -26,7 +26,9 @@ #include "libc/sysv/consts/fileno.h" /** - * Aborts process after printing details on its current state. + * Aborts process after printing a backtrace. + * + * If a debugger is present then this will trigger a breakpoint. */ relegated wontreturn void __die(void) { static bool once; diff --git a/libc/log/gdb.h b/libc/log/gdb.h index 15d65b3e..09dcf87a 100644 --- a/libc/log/gdb.h +++ b/libc/log/gdb.h @@ -44,12 +44,12 @@ int attachdebugger(intptr_t); ({ \ int64_t WaAx; \ if (!IsWindows()) { \ - register void *Reg10 asm("r10") = (OPT_OUT_RUSAGE); \ - asm volatile("syscall" \ + asm volatile("mov\t%5,%%r10\n\t" \ + "syscall" \ : "=a"(WaAx) \ : "0"(__NR_wait4), "D"(PID), "S"(OPT_OUT_WSTATUS), \ - "d"(OPTIONS), "r"(Reg10) \ - : "rcx", "r11", "cc", "memory"); \ + "d"(OPTIONS), "g"(OPT_OUT_RUSAGE) \ + : "rcx", "r10", "r11", "cc", "memory"); \ } else { \ WaAx = wait4$nt(PID, OPT_OUT_WSTATUS, OPTIONS, OPT_OUT_RUSAGE); \ } \ diff --git a/libc/log/gdbexec.c b/libc/log/gdbexec.c index dcf49f99..575a22cc 100644 --- a/libc/log/gdbexec.c +++ b/libc/log/gdbexec.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" #include "libc/fmt/fmt.h" diff --git a/libc/log/gdbsync.c b/libc/log/gdbsync.c index 995b2a20..65098add 100644 --- a/libc/log/gdbsync.c +++ b/libc/log/gdbsync.c @@ -19,4 +19,4 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/log/gdb.h" -volatile int g_gdbsync; +volatile int g_gdbsync hidden; diff --git a/libc/log/getttysize.c b/libc/log/getttysize.c index b8c93c78..695d0458 100644 --- a/libc/log/getttysize.c +++ b/libc/log/getttysize.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/termios.h" #include "libc/fmt/conv.h" diff --git a/libc/log/isterminalinarticulate.c b/libc/log/isterminalinarticulate.c index 5bc869cb..5f68cdf7 100644 --- a/libc/log/isterminalinarticulate.c +++ b/libc/log/isterminalinarticulate.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/log/log.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" diff --git a/libc/log/oncrash.c b/libc/log/oncrash.c index 4bc735ab..998091ec 100644 --- a/libc/log/oncrash.c +++ b/libc/log/oncrash.c @@ -64,8 +64,8 @@ static const char kGodHatesFlags[12] forcealign(1) = "CVPRAKZSTIDO"; static const char kCrashSigNames[8][5] forcealign(1) = { "QUIT", "FPE", "ILL", "SEGV", "TRAP", "ABRT", "BUS"}; -int kCrashSigs[8]; -struct sigaction g_oldcrashacts[8]; +hidden int kCrashSigs[8]; +hidden struct sigaction g_oldcrashacts[8]; relegated static const char *TinyStrSignal(int sig) { size_t i; diff --git a/libc/math/fmax.c b/libc/math/fmax.c deleted file mode 100644 index ba954795..00000000 --- a/libc/math/fmax.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "libc/math/math.h" - -double fmax(double x, double y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - /* handle signed zeros, see C99 Annex F.9.9.2 */ - if (signbit(x) != signbit(y)) - return signbit(x) ? y : x; - return x < y ? y : x; -} diff --git a/libc/math/fmaxf.c b/libc/math/fmaxf.c deleted file mode 100644 index 3f52a7fe..00000000 --- a/libc/math/fmaxf.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "libc/math/math.h" - -float fmaxf(float x, float y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - /* handle signed zeroes, see C99 Annex F.9.9.2 */ - if (signbit(x) != signbit(y)) - return signbit(x) ? y : x; - return x < y ? y : x; -} diff --git a/libc/math/fmaxl.c b/libc/math/fmaxl.c deleted file mode 100644 index ee92ab65..00000000 --- a/libc/math/fmaxl.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "libc/math/math.h" - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -long double fmaxl(long double x, long double y) -{ - return fmax(x, y); -} -#else -long double fmaxl(long double x, long double y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - /* handle signed zeros, see C99 Annex F.9.9.2 */ - if (signbit(x) != signbit(y)) - return signbit(x) ? y : x; - return x < y ? y : x; -} -#endif diff --git a/libc/math/fmin.c b/libc/math/fmin.c deleted file mode 100644 index 2ddfe44d..00000000 --- a/libc/math/fmin.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "libc/math/math.h" - -double fmin(double x, double y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - /* handle signed zeros, see C99 Annex F.9.9.2 */ - if (signbit(x) != signbit(y)) - return signbit(x) ? x : y; - return x < y ? x : y; -} diff --git a/libc/math/fminf.c b/libc/math/fminf.c deleted file mode 100644 index 9aa5d017..00000000 --- a/libc/math/fminf.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "libc/math/math.h" - -float fminf(float x, float y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - /* handle signed zeros, see C99 Annex F.9.9.2 */ - if (signbit(x) != signbit(y)) - return signbit(x) ? x : y; - return x < y ? x : y; -} diff --git a/libc/math/fminl.c b/libc/math/fminl.c deleted file mode 100644 index 44e1eb0b..00000000 --- a/libc/math/fminl.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "libc/math/math.h" - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -long double fminl(long double x, long double y) -{ - return fmin(x, y); -} -#else -long double fminl(long double x, long double y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - /* handle signed zeros, see C99 Annex F.9.9.2 */ - if (signbit(x) != signbit(y)) - return signbit(x) ? x : y; - return x < y ? x : y; -} -#endif diff --git a/libc/mem/balloc.c b/libc/mem/balloc.c index aeb711ea..2c123e18 100644 --- a/libc/mem/balloc.c +++ b/libc/mem/balloc.c @@ -20,24 +20,19 @@ #include "libc/mem/mem.h" #include "libc/runtime/buffer.h" -/* TODO(jart): Delete */ - #define kGuard PAGESIZE #define kGrain FRAMESIZE /** * Allocates page-guarded buffer. * - * ┌─────────────────────────────────────┬s─i─g─s─e─g─v┐ - * │ 𝑣₀..𝑣ₙ₋₁ │𝑣ₙ..𝑣ₙ₊₄₀₉₆₋₁│ - * └─────────────────────────────────────┴s─i─g─s─e─g─v┘ - * * @param b is metadata object owned by caller, initialized to zero for * first call; subsequent calls will resize * @param a is alignment requirement in bytes, e.g. 1,2,4,8,16,... * @param n is buffer size in bytes * @return b->p * @see ralloc() + * @deprecated */ void *balloc(struct GuardedBuffer *b, unsigned a, size_t n) { return (b->p = memalign(a, n)); diff --git a/libc/mem/bfree.c b/libc/mem/bfree.c index 1e819253..2c003daf 100644 --- a/libc/mem/bfree.c +++ b/libc/mem/bfree.c @@ -22,8 +22,10 @@ #include "libc/mem/mem.h" #include "libc/runtime/buffer.h" -/* TODO(jart): Delete */ - +/** + * Frees memory return by balloc(). + * @deprecated + */ void bfree(struct GuardedBuffer *b) { free(b->p); } diff --git a/libc/mem/cxx/free.S b/libc/mem/cxx/free.S index af97b83d..4295ffb2 100644 --- a/libc/mem/cxx/free.S +++ b/libc/mem/cxx/free.S @@ -22,9 +22,9 @@ / Frees memory the C++ way. / -/ @param %rdi is pointer, or NULL for no-op -/ @param %rsi is ignored -/ @param %rdx is ignored +/ \param %rdi is pointer, or NULL for no-op +/ \param %rsi is ignored +/ \param %rdx is ignored _ZdlPvSt11align_val_tRKSt9nothrow_t: / operator delete(void*, std::align_val_t, std::nothrow_t const&) nop diff --git a/libc/mem/cxx/malloc.S b/libc/mem/cxx/malloc.S index 318e1f11..6fdf55e7 100644 --- a/libc/mem/cxx/malloc.S +++ b/libc/mem/cxx/malloc.S @@ -22,9 +22,9 @@ / Allocates memory the C++ way. / -/ @param %rdi is bytes to allocate -/ @param %rsi is ignored -/ @return new memory or NULL on OOM +/ \param %rdi is bytes to allocate +/ \param %rsi is ignored +/ \return new memory or NULL on OOM _ZnamRKSt9nothrow_t: / operator new[](unsigned long, std::nothrow_t const&) nop diff --git a/libc/mem/cxx/memalign.S b/libc/mem/cxx/memalign.S index 7b5ded02..cd193e6c 100644 --- a/libc/mem/cxx/memalign.S +++ b/libc/mem/cxx/memalign.S @@ -22,10 +22,10 @@ / Allocates aligned memory the C++ way. / -/ @param %rdi is bytes to allocate -/ @param %rsi is byte alignment -/ @param %rdx is ignored -/ @return new memory or NULL on OOM +/ \param %rdi is bytes to allocate +/ \param %rsi is byte alignment +/ \param %rdx is ignored +/ \return new memory or NULL on OOM _ZnamSt11align_val_tRKSt9nothrow_t: / operator new[](unsigned long, std::align_val_t, std::nothrow_t const&) nop diff --git a/libc/mem/internal.h b/libc/mem/internal.h index 7b538744..fa3f1f23 100644 --- a/libc/mem/internal.h +++ b/libc/mem/internal.h @@ -3,7 +3,7 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -int PutEnvImpl(char *string, bool overwrite) hidden; +int PutEnvImpl(char *, bool) hidden; COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/mem/realloc.S b/libc/mem/realloc.S index be88cfca..1be72e79 100644 --- a/libc/mem/realloc.S +++ b/libc/mem/realloc.S @@ -36,12 +36,12 @@ / / Please note that p is NOT free()'d should realloc() fail, thus: / -/ if ((p2 = realloc(p, n2))) { -/ p = p2; -/ ... -/ } else { -/ ... -/ } +/ if ((p2 = realloc(p, n2))) { +/ p = p2; +/ ... +/ } else { +/ ... +/ } / / if n is for fewer bytes than already held by p, the newly unused / space is lopped off and freed if possible. diff --git a/libc/mem/wcsdup.c b/libc/mem/wcsdup.c index d714f009..e5ee85ca 100644 --- a/libc/mem/wcsdup.c +++ b/libc/mem/wcsdup.c @@ -20,6 +20,9 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" +/** + * Allocates copy of wide string. + */ wchar_t *wcsdup(const wchar_t *s) { size_t len = wcslen(s); char *s2 = malloc(len * sizeof(wchar_t) + 1); diff --git a/libc/nexgen32e/bsf.c b/libc/nexgen32e/bsf.c index 01f19dce..8198c45b 100644 --- a/libc/nexgen32e/bsf.c +++ b/libc/nexgen32e/bsf.c @@ -19,6 +19,23 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/bsf.h" +/** + * Returns position of first bit set. + * + * ctz(𝑥) 31^clz(𝑥) clz(𝑥) + * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) + * 0x00000000 wut 32 0 wut 32 + * 0x00000001 0 0 1 0 31 + * 0x80000001 0 0 1 31 0 + * 0x80000000 31 31 32 31 0 + * 0x00000010 4 4 5 4 27 + * 0x08000010 4 4 5 27 4 + * 0x08000000 27 27 28 27 4 + * 0xffffffff 0 0 1 31 0 + * + * @param x is a 32-bit integer + * @return number in range 0..31 or undefined if 𝑥 is 0 + */ int(bsf)(int x) { return bsf(x); } diff --git a/libc/nexgen32e/bsfl.c b/libc/nexgen32e/bsfl.c index 2619b723..fa8e84ba 100644 --- a/libc/nexgen32e/bsfl.c +++ b/libc/nexgen32e/bsfl.c @@ -19,6 +19,23 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/bsf.h" +/** + * Returns position of first bit set. + * + * ctz(𝑥) 31^clz(𝑥) clz(𝑥) + * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) + * 0x00000000 wut 32 0 wut 32 + * 0x00000001 0 0 1 0 31 + * 0x80000001 0 0 1 31 0 + * 0x80000000 31 31 32 31 0 + * 0x00000010 4 4 5 4 27 + * 0x08000010 4 4 5 27 4 + * 0x08000000 27 27 28 27 4 + * 0xffffffff 0 0 1 31 0 + * + * @param 𝑥 is a 64-bit integer + * @return number in range 0..63 or undefined if 𝑥 is 0 + */ int(bsfl)(long x) { return bsfl(x); } diff --git a/libc/nexgen32e/bsfll.c b/libc/nexgen32e/bsfll.c index 10639e6f..ac596830 100644 --- a/libc/nexgen32e/bsfll.c +++ b/libc/nexgen32e/bsfll.c @@ -19,6 +19,23 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/bsf.h" +/** + * Returns position of first bit set. + * + * ctz(𝑥) 31^clz(𝑥) clz(𝑥) + * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) + * 0x00000000 wut 32 0 wut 32 + * 0x00000001 0 0 1 0 31 + * 0x80000001 0 0 1 31 0 + * 0x80000000 31 31 32 31 0 + * 0x00000010 4 4 5 4 27 + * 0x08000010 4 4 5 27 4 + * 0x08000000 27 27 28 27 4 + * 0xffffffff 0 0 1 31 0 + * + * @param 𝑥 is a 64-bit integer + * @return number in range 0..63 or undefined if 𝑥 is 0 + */ int(bsfll)(long long x) { return bsfll(x); } diff --git a/libc/nexgen32e/bsr.c b/libc/nexgen32e/bsr.c index 10c12e7e..cdb5535d 100644 --- a/libc/nexgen32e/bsr.c +++ b/libc/nexgen32e/bsr.c @@ -19,6 +19,23 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/bsr.h" +/** + * Returns binary logarithm of 𝑥. + * + * ctz(𝑥) 31^clz(𝑥) clz(𝑥) + * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) + * 0x00000000 wut 32 0 wut 32 + * 0x00000001 0 0 1 0 31 + * 0x80000001 0 0 1 31 0 + * 0x80000000 31 31 32 31 0 + * 0x00000010 4 4 5 4 27 + * 0x08000010 4 4 5 27 4 + * 0x08000000 27 27 28 27 4 + * 0xffffffff 0 0 1 31 0 + * + * @param x is a 32-bit integer + * @return number in range 0..31 or undefined if 𝑥 is 0 + */ int(bsr)(int x) { return bsr(x); } diff --git a/libc/nexgen32e/bsrl.c b/libc/nexgen32e/bsrl.c index 56e60d66..4d671624 100644 --- a/libc/nexgen32e/bsrl.c +++ b/libc/nexgen32e/bsrl.c @@ -19,6 +19,23 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/bsr.h" +/** + * Returns binary logarithm of 𝑥. + * + * ctz(𝑥) 31^clz(𝑥) clz(𝑥) + * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) + * 0x00000000 wut 32 0 wut 32 + * 0x00000001 0 0 1 0 31 + * 0x80000001 0 0 1 31 0 + * 0x80000000 31 31 32 31 0 + * 0x00000010 4 4 5 4 27 + * 0x08000010 4 4 5 27 4 + * 0x08000000 27 27 28 27 4 + * 0xffffffff 0 0 1 31 0 + * + * @param x is a 64-bit integer + * @return number in range 0..63 or undefined if 𝑥 is 0 + */ int(bsrl)(long x) { return bsrl(x); } diff --git a/libc/nexgen32e/bsrll.c b/libc/nexgen32e/bsrll.c index 46e08f41..4627bd7d 100644 --- a/libc/nexgen32e/bsrll.c +++ b/libc/nexgen32e/bsrll.c @@ -19,6 +19,23 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/bsr.h" +/** + * Returns binary logarithm of 𝑥. + * + * ctz(𝑥) 31^clz(𝑥) clz(𝑥) + * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) + * 0x00000000 wut 32 0 wut 32 + * 0x00000001 0 0 1 0 31 + * 0x80000001 0 0 1 31 0 + * 0x80000000 31 31 32 31 0 + * 0x00000010 4 4 5 4 27 + * 0x08000010 4 4 5 27 4 + * 0x08000000 27 27 28 27 4 + * 0xffffffff 0 0 1 31 0 + * + * @param x is a 64-bit integer + * @return number in range 0..63 or undefined if 𝑥 is 0 + */ int(bsrll)(long long x) { return bsrll(x); } diff --git a/libc/nexgen32e/bsrmax.S b/libc/nexgen32e/bsrmax.S index d34bbeaf..be493e34 100644 --- a/libc/nexgen32e/bsrmax.S +++ b/libc/nexgen32e/bsrmax.S @@ -21,20 +21,19 @@ / Returns binary logarithm of integer 𝑥. / +/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) +/ 0x00000000 wut 32 0 wut 32 +/ 0x00000001 0 0 1 0 31 +/ 0x80000001 0 0 1 31 0 +/ 0x80000000 31 31 32 31 0 +/ 0x00000010 4 4 5 4 27 +/ 0x08000010 4 4 5 27 4 +/ 0x08000000 27 27 28 27 4 +/ 0xffffffff 0 0 1 31 0 +/ / @param rsi:rdi is 128-bit unsigned 𝑥 value / @return eax number in range [0,128) or undef if 𝑥 is 0 / @see also treasure trove of nearly identical functions -/ -/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) -/ 0x00000000 wut 32 0 wut 32 -/ 0x00000001 0 0 1 0 31 -/ 0x80000001 0 0 1 31 0 -/ 0x80000000 31 31 32 31 0 -/ 0x00000010 4 4 5 4 27 -/ 0x08000010 4 4 5 27 4 -/ 0x08000000 27 27 28 27 4 -/ 0xffffffff 0 0 1 31 0 -/ bsrmax: .leafprologue .profilable bsr %rsi,%rax diff --git a/libc/nexgen32e/crc32c-pure.c b/libc/nexgen32e/crc32c-pure.c index 5bbd9add..a97bf89c 100644 --- a/libc/nexgen32e/crc32c-pure.c +++ b/libc/nexgen32e/crc32c-pure.c @@ -19,11 +19,12 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/crc32.h" +extern const uint32_t kCrc32cTab[256]; + /** * Computes Castagnoli CRC-32 on old computers. */ uint32_t crc32c$pure(uint32_t init, const void *data, size_t size) { - extern const uint32_t kCrc32cTab[256]; const unsigned char *p = data; uint32_t h = init ^ 0xffffffff; unsigned i; diff --git a/libc/nexgen32e/crc32init.S b/libc/nexgen32e/crc32init.S index 6e2a9701..463ca598 100644 --- a/libc/nexgen32e/crc32init.S +++ b/libc/nexgen32e/crc32init.S @@ -21,16 +21,16 @@ / Generates lookup table for computing CRC-32 byte-by-byte. / -/ void crc32init(uint32_t table[256], uint32_t polynomial) { -/ uint32_t d, i, r; -/ for (d = 0; d < 256; ++d) { -/ r = d; -/ for (i = 0; i < 8; ++i) { -/ r = r >> 1 ^ (r & 1 ? polynomial : 0); +/ void crc32init(uint32_t table[256], uint32_t polynomial) { +/ uint32_t d, i, r; +/ for (d = 0; d < 256; ++d) { +/ r = d; +/ for (i = 0; i < 8; ++i) { +/ r = r >> 1 ^ (r & 1 ? polynomial : 0); +/ } +/ table[d] = r; / } -/ table[d] = r; / } -/ } / / @param rdi is pointer to uint32_t[256] array / @param esi 32-bit binary polynomial config diff --git a/libc/nexgen32e/crc32z.c b/libc/nexgen32e/crc32z.c index c0ed7dd6..d6e62b63 100644 --- a/libc/nexgen32e/crc32z.c +++ b/libc/nexgen32e/crc32z.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/nexgen32e/crc32.h" #include "libc/nexgen32e/x86feature.h" diff --git a/libc/nexgen32e/ffs.S b/libc/nexgen32e/ffs.S index 1391f16d..4559d097 100644 --- a/libc/nexgen32e/ffs.S +++ b/libc/nexgen32e/ffs.S @@ -21,10 +21,6 @@ / Finds lowest set bit in word. / -/ @param edi is the input number -/ @return number in range [1,32] or 0 if no bits set -/ @see also treasure trove of nearly identical functions -/ / uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) / 0x00000000 wut 32 0 wut 32 / 0x00000001 0 0 1 0 31 @@ -35,6 +31,9 @@ / 0x08000000 27 27 28 27 4 / 0xffffffff 0 0 1 31 0 / +/ @param edi is the input number +/ @return number in range [1,32] or 0 if no bits set +/ @see also treasure trove of nearly identical functions / @asyncsignalsafe ffs: .leafprologue .profilable diff --git a/libc/nexgen32e/ffsl.S b/libc/nexgen32e/ffsl.S index 84a2540e..9833af19 100644 --- a/libc/nexgen32e/ffsl.S +++ b/libc/nexgen32e/ffsl.S @@ -21,10 +21,6 @@ / Finds lowest set bit in word. / -/ @param rdi is the input number -/ @return number in range [1,64] or 0 if no bits set -/ @see also treasure trove of nearly identical functions -/ / uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) / 0x00000000 wut 32 0 wut 32 / 0x00000001 0 0 1 0 31 @@ -35,6 +31,9 @@ / 0x08000000 27 27 28 27 4 / 0xffffffff 0 0 1 31 0 / +/ @param rdi is the input number +/ @return number in range [1,64] or 0 if no bits set +/ @see also treasure trove of nearly identical functions / @asyncsignalsafe ffsl: .leafprologue .profilable diff --git a/libc/nexgen32e/kcp437.S b/libc/nexgen32e/kcp437.S index f76a2662..a15e4c85 100644 --- a/libc/nexgen32e/kcp437.S +++ b/libc/nexgen32e/kcp437.S @@ -24,38 +24,38 @@ / ibm cp437 unicode table w/ string literal safety / -/ ░▄██▒▄█ ▐██ ░░░ ▀▀████▒▀█▄ -/ ▐███▓██░ ██▌ ▀████▄■█▄ -/ ▐█▓███▀█░██▀ ░ ░▀█████▓▄ -/ ▐█▓██▀▄█▒██▀ ▄▄░ ▄▄▄ ░░░ ░▀████▒▄ -/ ▐████▀▄█■█▀ ▀▀ ░█████░ -/ ▐█▓█▀████▀ ░ ▐▓███▒ -/ █░███▀▀ ░░░ ▄█ ░░░ █████ -/ ▐█▓█░▀▀ ░░▄█▄▄▄▄▄ ▀▄ ▌▄▄▄░▄▄▄▄▄ ▐████░ -/ ▐███▌ ▄▀█████████▄ ▌▐▄████████▄ ▐▓███░ -/ ▐███░░░▀▄█▀▄▄████▄▀░ ▐████████▒ ▀ ░███░ -/ ░████░ ▓▀ ▄███████▀▌ ▀▄■████▀▀█▀ ██▀█ -/ ▓███░ ░▄▀▀░░░ ▀ ░░▌ ▄▀▀▄░░▀░▄▀▄ ▐██▀▄ -/ ░███░ ▄▓▓▄▄░▀▀█▀█ ▌░░ ▀█▀█▀▀ ▐██▀ -/ █▀▄▐██ ▀░░ ▄▀ ▐ █ ▀ ▄▄▄░ ░▀▄█▄▀█ -/ ▌▄ █▓ ▒ ░ █▄█▄▀▄▄▄███▄▀▄ ░░ ░ ▀ █▌ -/ █▌▄░▌ ░░░▄▀█▀███████▄▀▄▀▄▀▀▄▄▄ █▀█░▐ -/ ██▄ ░░░▄█▄▀██▄█■██████▄█▄█▄■▀█░ ▐░▐ -/ ▀██░ ░▄██████████████████▄█▄█ ░█ ░ ▄▀ -/ ▀▓█▄▓░░ ▒█▀█████████████████████▒ ██▀ -/ ▀███ ▓▒ ██████████████▀▀▀▀█▄▀ ░▄█▒ -/ ▀███ ▀█▄▀▄█████▀▀ ▓▓▓▄░ ▐ ░▄██ -/ ▀██ ▄███████▄████████▀░░ ░▄██ -/ ▄██▀▀▄ █▄▀▄██▒▒███████████▀▀▀▄░ ░███░ -/ ▄██▀▄▄░░▀▐▄████▄ █████▀▄░░█▀▄▀░░ ▄██░ -/ █████▄▄▄███▀░█▌██▄▀▀█████▄▄░░░▄▄███▀██▄ ▄▀▀▀▄▄ -/ ▀██████▀■▄█▄▄ ░▀███████████████▓▓░░▄██▀▄████▄▄▀▄ -/ -/ █▀█ █ █▀█ █▀█ █▄▀ ▐▀█▀▌█▀█ █▀█ █▄ █ ▀█▀ █▀█ █▀▀ -/ █▀▄ █ █ █ █ █ ▀▄ █ █▀▄ █ █ █ ▀█ █ █ ▀▀█ -/ █▄█ █▄▌█▄█ █▄█ █ █ █ █ █ █▄█ █ █ ▄█▄ █▄█ █▄█ -/ -/ THERE WILL BE BLOCKS march 01 2017 +/ ░▄██▒▄█ ▐██ ░░░ ▀▀████▒▀█▄ +/ ▐███▓██░ ██▌ ▀████▄■█▄ +/ ▐█▓███▀█░██▀ ░ ░▀█████▓▄ +/ ▐█▓██▀▄█▒██▀ ▄▄░ ▄▄▄ ░░░ ░▀████▒▄ +/ ▐████▀▄█■█▀ ▀▀ ░█████░ +/ ▐█▓█▀████▀ ░ ▐▓███▒ +/ █░███▀▀ ░░░ ▄█ ░░░ █████ +/ ▐█▓█░▀▀ ░░▄█▄▄▄▄▄ ▀▄ ▌▄▄▄░▄▄▄▄▄ ▐████░ +/ ▐███▌ ▄▀█████████▄ ▌▐▄████████▄ ▐▓███░ +/ ▐███░░░▀▄█▀▄▄████▄▀░ ▐████████▒ ▀ ░███░ +/ ░████░ ▓▀ ▄███████▀▌ ▀▄■████▀▀█▀ ██▀█ +/ ▓███░ ░▄▀▀░░░ ▀ ░░▌ ▄▀▀▄░░▀░▄▀▄ ▐██▀▄ +/ ░███░ ▄▓▓▄▄░▀▀█▀█ ▌░░ ▀█▀█▀▀ ▐██▀ +/ █▀▄▐██ ▀░░ ▄▀ ▐ █ ▀ ▄▄▄░ ░▀▄█▄▀█ +/ ▌▄ █▓ ▒ ░ █▄█▄▀▄▄▄███▄▀▄ ░░ ░ ▀ █▌ +/ █▌▄░▌ ░░░▄▀█▀███████▄▀▄▀▄▀▀▄▄▄ █▀█░▐ +/ ██▄ ░░░▄█▄▀██▄█■██████▄█▄█▄■▀█░ ▐░▐ +/ ▀██░ ░▄██████████████████▄█▄█ ░█ ░ ▄▀ +/ ▀▓█▄▓░░ ▒█▀█████████████████████▒ ██▀ +/ ▀███ ▓▒ ██████████████▀▀▀▀█▄▀ ░▄█▒ +/ ▀███ ▀█▄▀▄█████▀▀ ▓▓▓▄░ ▐ ░▄██ +/ ▀██ ▄███████▄████████▀░░ ░▄██ +/ ▄██▀▀▄ █▄▀▄██▒▒███████████▀▀▀▄░ ░███░ +/ ▄██▀▄▄░░▀▐▄████▄ █████▀▄░░█▀▄▀░░ ▄██░ +/ █████▄▄▄███▀░█▌██▄▀▀█████▄▄░░░▄▄███▀██▄ ▄▀▀▀▄▄ +/ ▀██████▀■▄█▄▄ ░▀███████████████▓▓░░▄██▀▄████▄▄▀▄ +/ +/ █▀█ █ █▀█ █▀█ █▄▀ ▐▀█▀▌█▀█ █▀█ █▄ █ ▀█▀ █▀█ █▀▀ +/ █▀▄ █ █ █ █ █ ▀▄ █ █▀▄ █ █ █ ▀█ █ █ ▀▀█ +/ █▄█ █▄▌█▄█ █▄█ █ █ █ █ █ █▄█ █ █ ▄█▄ █▄█ █▄█ +/ +/ THERE WILL BE BLOCKS march 01 2017 / / @see libc/str/str.h / @see kCp437i[] diff --git a/libc/nexgen32e/ksigsetempty.S b/libc/nexgen32e/ksigsetempty.S index 96e10705..6a8a595e 100644 --- a/libc/nexgen32e/ksigsetempty.S +++ b/libc/nexgen32e/ksigsetempty.S @@ -20,8 +20,6 @@ #include "libc/macros.h" / Signal mask constant w/ no signal bits set. -/ -/ @see kSigsetFull,sigemptyset(),sigprocmask(),sigaction() .initbss 300,_init_kSigsetEmpty kSigsetEmpty: .rept NSIG / 64 diff --git a/libc/nexgen32e/ksigsetfull.S b/libc/nexgen32e/ksigsetfull.S index f0e25f1c..0c9dbcac 100644 --- a/libc/nexgen32e/ksigsetfull.S +++ b/libc/nexgen32e/ksigsetfull.S @@ -20,8 +20,6 @@ #include "libc/macros.h" / Signal mask constant w/ every signal bit set. -/ -/ @see kSigsetEmpty,sigemptyset(),sigprocmask(),sigaction() .initbss 300,_init_kSigsetFull kSigsetFull: .rept NSIG / 64 diff --git a/libc/nexgen32e/kstarttsc.S b/libc/nexgen32e/kstarttsc.S index 9f57da66..42120af9 100644 --- a/libc/nexgen32e/kstarttsc.S +++ b/libc/nexgen32e/kstarttsc.S @@ -20,7 +20,9 @@ #include "libc/macros.h" / Stores CPU Timestamp Counter at startup. +/ / It can be useful as an added source of seeding information. +/ / @note rdtsc is a 25 cycle instruction .initbss 200,_init_kStartTsc kStartTsc: diff --git a/libc/nexgen32e/lzcnt.S b/libc/nexgen32e/lzcnt.S index 2ce17e27..305b5192 100644 --- a/libc/nexgen32e/lzcnt.S +++ b/libc/nexgen32e/lzcnt.S @@ -21,10 +21,6 @@ / Finds leading bits in 𝑥. / -/ @param edi is 32-bit unsigned 𝑥 value -/ @return eax number in range [0,32) or 32 if 𝑥 is 0 -/ @see also treasure trove of nearly identical functions -/ / uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) / 0x00000000 wut 32 0 wut 32 / 0x00000001 0 0 1 0 31 @@ -35,6 +31,9 @@ / 0x08000000 27 27 28 27 4 / 0xffffffff 0 0 1 31 0 / +/ @param edi is 32-bit unsigned 𝑥 value +/ @return eax number in range [0,32) or 32 if 𝑥 is 0 +/ @see also treasure trove of nearly identical functions lzcnt: .leafprologue .profilable mov $31,%eax diff --git a/libc/nexgen32e/lzcntl.S b/libc/nexgen32e/lzcntl.S index 519c3645..8d8c1975 100644 --- a/libc/nexgen32e/lzcntl.S +++ b/libc/nexgen32e/lzcntl.S @@ -21,10 +21,6 @@ / Finds leading bits in 𝑥. / -/ @param rdi is 64-bit unsigned 𝑥 value -/ @return rax number in range [0,64) or 64 if 𝑥 is 0 -/ @see also treasure trove of nearly identical functions -/ / uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) / 0x00000000 wut 32 0 wut 32 / 0x00000001 0 0 1 0 31 @@ -35,6 +31,9 @@ / 0x08000000 27 27 28 27 4 / 0xffffffff 0 0 1 31 0 / +/ @param rdi is 64-bit unsigned 𝑥 value +/ @return rax number in range [0,64) or 64 if 𝑥 is 0 +/ @see also treasure trove of nearly identical functions lzcntl: .leafprologue .profilable mov $31,%eax diff --git a/libc/nexgen32e/strstr-sse42.S b/libc/nexgen32e/strstr-sse42.S index 4750de8f..0a682dae 100644 --- a/libc/nexgen32e/strstr-sse42.S +++ b/libc/nexgen32e/strstr-sse42.S @@ -21,12 +21,7 @@ #include "libc/nexgen32e/pcmpstr.inc" #include "libc/nexgen32e/strstr.inc" -/ Searches for substring. -/ -/ @param rdi is NUL-terminated haystack string -/ @param rsi is NUL-terminated needle string (16-byte aligned) -/ @return rax is pointer to substring or NULL -/ @asyncsignalsafe +/ TODO(jart): Fix me. strstr$sse42: .leafprologue mov %rdi,%rax diff --git a/libc/nexgen32e/strstr16-sse42.S b/libc/nexgen32e/strstr16-sse42.S index 39a624c9..9f546696 100644 --- a/libc/nexgen32e/strstr16-sse42.S +++ b/libc/nexgen32e/strstr16-sse42.S @@ -21,12 +21,7 @@ #include "libc/nexgen32e/pcmpstr.inc" #include "libc/nexgen32e/strstr.inc" -/ Searches for substring w/ char16_t. -/ -/ @param rdi is NUL-terminated haystack string -/ @param rsi is NUL-terminated needle string (16-byte aligned) -/ @return rax is pointer to substring or NULL -/ @asyncsignalsafe +/ TODO(jart): Fix me. strstr16$sse42: .strstr .Lequalorder16 .endfn strstr16$sse42,globl,hidden diff --git a/libc/nexgen32e/tzcnt.S b/libc/nexgen32e/tzcnt.S index f9aa7c48..5d5269ff 100644 --- a/libc/nexgen32e/tzcnt.S +++ b/libc/nexgen32e/tzcnt.S @@ -22,10 +22,6 @@ / Finds lowest set bit in 𝑥. / -/ @param edi is 32-bit unsigned 𝑥 value -/ @return eax number in range [0,32) or 32 if 𝑥 is 0 -/ @see also treasure trove of nearly identical functions -/ / uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) / 0x00000000 wut 32 0 wut 32 / 0x00000001 0 0 1 0 31 @@ -36,6 +32,9 @@ / 0x08000000 27 27 28 27 4 / 0xffffffff 0 0 1 31 0 / +/ @param edi is 32-bit unsigned 𝑥 value +/ @return eax number in range [0,32) or 32 if 𝑥 is 0 +/ @see also treasure trove of nearly identical functions tzcnt: .leafprologue .profilable mov $32,%esi diff --git a/libc/nexgen32e/tzcntl.S b/libc/nexgen32e/tzcntl.S index 91c9c376..9a289809 100644 --- a/libc/nexgen32e/tzcntl.S +++ b/libc/nexgen32e/tzcntl.S @@ -21,10 +21,6 @@ / Finds lowest set bit in 𝑥. / -/ @param rdi is 64-bit unsigned 𝑥 value -/ @return rax number in range [0,64) or 64 if 𝑥 is 0 -/ @see also treasure trove of nearly identical functions -/ / uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) / 0x00000000 wut 32 0 wut 32 / 0x00000001 0 0 1 0 31 @@ -35,6 +31,9 @@ / 0x08000000 27 27 28 27 4 / 0xffffffff 0 0 1 31 0 / +/ @param rdi is 64-bit unsigned 𝑥 value +/ @return rax number in range [0,64) or 64 if 𝑥 is 0 +/ @see also treasure trove of nearly identical functions tzcntl: .leafprologue .profilable mov $64,%esi diff --git a/libc/nt/nt/loader.h b/libc/nt/nt/loader.h index 7d77a1e2..2347fe04 100644 --- a/libc/nt/nt/loader.h +++ b/libc/nt/nt/loader.h @@ -37,8 +37,6 @@ struct NtAnsiString; struct NtLdrDataTableEntry; struct NtUnicodeString; -const struct NtLdrDataTableEntry *NtGetModule(const char *basename) nothrow; - NtStatus LdrLoadDll(const char16_t *opt_PathToFile, uint32_t *opt_Flags, struct NtUnicodeString *ModuleFileName, void **out_ModuleHandle); diff --git a/libc/rand/g_rando32.c b/libc/rand/g_rando32.c index 422a816e..399f3595 100644 --- a/libc/rand/g_rando32.c +++ b/libc/rand/g_rando32.c @@ -26,11 +26,11 @@ hidden uint32_t g_rando32; textstartup static void g_rando32_init() { - register intptr_t *auxv asm("r15"); /* @see libc/crt/crt.S */ - asm volatile("" : "=r"(auxv)); + intptr_t *auxvp; if (!IsXnu() && !IsWindows()) { - for (intptr_t *auxvp = auxv; auxvp[0]; auxvp += 2) { - if (auxvp[0] == AT_RANDOM) { + asm("mov\t%%r15,%0" : "=r"(auxvp)); /* libc/crt/crt.S */ + for (; *auxvp; auxvp += 2) { + if (*auxvp == AT_RANDOM) { uint8_t(*sysrandseed)[16] = (uint8_t(*)[16])auxvp[1]; if (sysrandseed) g_rando32 ^= read32le(&(*sysrandseed)[8]); return; diff --git a/libc/rand/g_rando64.c b/libc/rand/g_rando64.c index 0fd88bf7..cc6d3e5c 100644 --- a/libc/rand/g_rando64.c +++ b/libc/rand/g_rando64.c @@ -26,10 +26,10 @@ hidden uint64_t g_rando64; textstartup static void g_rando64_init() { - register intptr_t *auxv asm("r15"); /* @see libc/crt/crt.S */ - asm volatile("" : "=r"(auxv)); + intptr_t *auxvp; if (!IsXnu() && !IsWindows()) { - for (intptr_t *auxvp = auxv; auxvp[0]; auxvp += 2) { + asm("mov\t%%r15,%0" : "=r"(auxvp)); /* libc/crt/crt.S */ + for (; auxvp[0]; auxvp += 2) { if (auxvp[0] == AT_RANDOM) { uint8_t(*sysrandseed)[16] = (uint8_t(*)[16])auxvp[1]; if (sysrandseed) g_rando64 ^= read64le(&(*sysrandseed)[0]); diff --git a/libc/rand/getrandom.c b/libc/rand/getrandom.c index 873b3820..57ab998e 100644 --- a/libc/rand/getrandom.c +++ b/libc/rand/getrandom.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/errno.h" #include "libc/rand/rand.h" diff --git a/libc/rand/rand32.c b/libc/rand/rand32.c index c072f575..16d8a8e0 100644 --- a/libc/rand/rand32.c +++ b/libc/rand/rand32.c @@ -22,6 +22,8 @@ #include "libc/rand/rand.h" #include "libc/rand/xorshift.h" +hidden extern uint32_t g_rando32; + /** * This function is an independent 32-bit clone of rand64(). */ @@ -35,7 +37,6 @@ nodebuginfo uint32_t(rand32)(void) { } else { devrand(&res, sizeof(res)); } - extern uint32_t g_rando32 hidden; res ^= MarsagliaXorshift32(&g_rando32); } return res; diff --git a/libc/rand/rand64.c b/libc/rand/rand64.c index 52a04275..a2471e4a 100644 --- a/libc/rand/rand64.c +++ b/libc/rand/rand64.c @@ -22,6 +22,8 @@ #include "libc/rand/rand.h" #include "libc/rand/xorshift.h" +hidden extern uint64_t g_rando64; + /** * Returns nondeterministic random number. * @@ -42,7 +44,6 @@ nodebuginfo uint64_t(rand64)(void) { } else { devrand(&res, sizeof(res)); } - hidden extern uint64_t g_rando64; res ^= MarsagliaXorshift64(&g_rando64); } return res; diff --git a/libc/runtime/closesymboltable.c b/libc/runtime/closesymboltable.c index a3be5623..68672239 100644 --- a/libc/runtime/closesymboltable.c +++ b/libc/runtime/closesymboltable.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.internal.h" diff --git a/libc/runtime/directmap.c b/libc/runtime/directmap.c index c338e666..ec69a159 100644 --- a/libc/runtime/directmap.c +++ b/libc/runtime/directmap.c @@ -21,12 +21,20 @@ #include "libc/nt/runtime.h" #include "libc/runtime/directmap.h" -struct DirectMap DirectMap(void *addr, size_t size, unsigned prot, - unsigned flags, int fd, int64_t off) { +/** + * Obtains memory mapping directly from system. + * + * The mmap() function needs to track memory mappings in order to + * support Windows NT and Address Sanitizer. That memory tracking can be + * bypassed by calling this function. However the caller is responsible + * for passing the magic memory handle on Windows NT to CloseHandle(). + */ +struct DirectMap __mmap(void *addr, size_t size, unsigned prot, unsigned flags, + int fd, int64_t off) { if (!IsWindows()) { return (struct DirectMap){mmap$sysv(addr, size, prot, flags, fd, off), kNtInvalidHandleValue}; } else { - return DirectMapNt(addr, size, prot, flags, fd, off); + return __mmap$nt(addr, size, prot, flags, fd, off); } } diff --git a/libc/runtime/directmap.h b/libc/runtime/directmap.h index 903aacbb..ee3d3dba 100644 --- a/libc/runtime/directmap.h +++ b/libc/runtime/directmap.h @@ -8,8 +8,8 @@ struct DirectMap { int64_t maphandle; }; -struct DirectMap DirectMap(void *, size_t, unsigned, unsigned, int, int64_t); -struct DirectMap DirectMapNt(void *, size_t, unsigned, unsigned, int, int64_t); +struct DirectMap __mmap(void *, size_t, unsigned, unsigned, int, int64_t); +struct DirectMap __mmap$nt(void *, size_t, unsigned, unsigned, int, int64_t); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/runtime/directmapnt.c b/libc/runtime/directmapnt.c index 141ecaa1..4684dd21 100644 --- a/libc/runtime/directmapnt.c +++ b/libc/runtime/directmapnt.c @@ -23,8 +23,8 @@ #include "libc/nt/runtime.h" #include "libc/runtime/directmap.h" -textwindows struct DirectMap DirectMapNt(void *addr, size_t size, unsigned prot, - unsigned flags, int fd, int64_t off) { +textwindows struct DirectMap __mmap$nt(void *addr, size_t size, unsigned prot, + unsigned flags, int fd, int64_t off) { int64_t handle; struct DirectMap res; if (fd != -1) { diff --git a/libc/runtime/ezmap.c b/libc/runtime/ezmap.c index e31aad86..812a82af 100644 --- a/libc/runtime/ezmap.c +++ b/libc/runtime/ezmap.c @@ -17,20 +17,16 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/limits.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #include "libc/runtime/runtime.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" -/** - * Memory-maps file for reading. - * An internal veneer for a common mmap() use-case. - */ -int MapFileRead(const char *filename, struct MappedFile *mf) { +hidden int MapFileRead(const char *filename, struct MappedFile *mf) { mf->addr = MAP_FAILED; if ((mf->fd = open(filename, O_RDONLY)) != -1 && (mf->size = getfiledescriptorsize(mf->fd)) < INT_MAX && @@ -44,10 +40,7 @@ int MapFileRead(const char *filename, struct MappedFile *mf) { } } -/** - * Releases resource returned by MapFileRead(). - */ -int UnmapFile(struct MappedFile *mf) { +hidden int UnmapFile(struct MappedFile *mf) { int rc; rc = 0; if (mf->addr && mf->addr != MAP_FAILED) { diff --git a/libc/runtime/ezmap.h b/libc/runtime/ezmap.internal.h similarity index 64% rename from libc/runtime/ezmap.h rename to libc/runtime/ezmap.internal.h index 77a9757a..5e89f1c2 100644 --- a/libc/runtime/ezmap.h +++ b/libc/runtime/ezmap.internal.h @@ -1,5 +1,5 @@ -#ifndef COSMOPOLITAN_LIBC_EZMAP_H_ -#define COSMOPOLITAN_LIBC_EZMAP_H_ +#ifndef COSMOPOLITAN_LIBC_RUNTIME_EZMAP_INTERNAL_H_ +#define COSMOPOLITAN_LIBC_RUNTIME_EZMAP_INTERNAL_H_ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ @@ -14,4 +14,4 @@ int UnmapFile(struct MappedFile *) hidden; COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_EZMAP_H_ */ +#endif /* COSMOPOLITAN_LIBC_RUNTIME_EZMAP_INTERNAL_H_ */ diff --git a/libc/runtime/ftrace.greg.c b/libc/runtime/ftrace.greg.c index b0d7c853..f6987135 100644 --- a/libc/runtime/ftrace.greg.c +++ b/libc/runtime/ftrace.greg.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/bisectcarleft.internal.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/sigset.h" @@ -100,15 +100,14 @@ privileged interruptfn void ftrace_hook(void) { } /** - * Enables plaintext function tracing if --ftrace flag passed. + * Enables plaintext function tracing if `--ftrace` flag is passed. * - * The --ftrace CLI arg is removed before main() is called. This - * code is intended for diagnostic purposes and assumes binaries - * are trustworthy and stack isn't corrupted. Logging plain text - * allows program structure to easily be visualized and hotspots - * identified w/ sed | sort | uniq -c | sort. A compressed trace - * can be made by appending --ftrace 2>&1 | gzip -4 >trace.gz to - * the CLI arguments. Have fun. + * The `--ftrace` CLI arg is removed before main() is called. This code + * is intended for diagnostic purposes and assumes binaries are + * trustworthy and stack isn't corrupted. Logging plain text allows + * program structure to easily be visualized and hotspots identified w/ + * `sed | sort | uniq -c | sort`. A compressed trace can be made by + * appending `--ftrace 2>&1 | gzip -4 >trace.gz` to the CLI arguments. * * @see libc/runtime/_init.S for documentation */ diff --git a/libc/runtime/getdosargv.c b/libc/runtime/getdosargv.c index 7890343c..fae52134 100644 --- a/libc/runtime/getdosargv.c +++ b/libc/runtime/getdosargv.c @@ -20,7 +20,7 @@ #include "libc/assert.h" #include "libc/bits/bits.h" #include "libc/bits/pushpop.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/runtime/internal.h" #include "libc/str/str.h" #include "libc/str/tpenc.h" diff --git a/libc/runtime/grow.c b/libc/runtime/grow.c index 8d5f566b..8d7fec13 100644 --- a/libc/runtime/grow.c +++ b/libc/runtime/grow.c @@ -27,40 +27,20 @@ #include "libc/str/str.h" #include "libc/sysv/errfuns.h" +/* TODO(jart): DELETE */ + #define GUARANTEE_TERMINATOR 1 #define INITIAL_CAPACITY (32 - GUARANTEE_TERMINATOR) /** - * Grows array, The Cosmopolitan Way. - * - * This function may be called once an array has run out of space. If p - * is NULL, a new array is allocated; otherwise, the array's made 1.5x - * bigger. It has been written that this amortizes list appends down to - * constant-time. Extended memory is zeroed. Growth is monotonic. - * - * If p points to to static memory or something on the stack, it'll be - * converted to dynamic memory automatically. This can make algorithms - * faster when the average case is a small amount of data. It also means - * functions using this (and free_s()) won't have a hard-requirement on - * malloc(). - * - * Consider trying the higher-level append() and concat() APIs (defined - * in libc/alg/arraylist.h) as an alternative to directly using grow(). - * - * @param pp points to pointer holding memory address - * @param capacity tracks maximum items that can be stored in p - * can only be 0 if p is NULL (see reallocarray() for non-monotonic) - * @param itemsize is the sizeof each item - * @return true on success, or false w/ errno and *p is NOT free()'d - * @error ENOMEM if realloc() not linked or mmap() failed - * @note tiny programs might need to explicitly YOINK(realloc) - * @see test/libc/runtime/grow_test.c + * Grows array. + * @deprecated favor realloc */ bool __grow(void *pp, size_t *capacity, size_t itemsize, size_t extra) { void **p, *p1, *p2; - size_t n1, n2; /* item counts */ - size_t t1, t2; /* byte counts */ - extra += GUARANTEE_TERMINATOR; /* p ⊃ p[𝑖]==0 */ + size_t n1, n2; + size_t t1, t2; + extra += GUARANTEE_TERMINATOR; p = (void **)pp; assert(itemsize); assert((*p && *capacity) || (!*p && !*capacity)); diff --git a/libc/runtime/internal.h b/libc/runtime/internal.h index d70024cf..11f0323b 100644 --- a/libc/runtime/internal.h +++ b/libc/runtime/internal.h @@ -3,7 +3,7 @@ #ifndef __STRICT_ANSI__ #include "libc/dce.h" #include "libc/elf/struct/ehdr.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #include "libc/runtime/runtime.h" #define STACK_CEIL 0x700000000000ul diff --git a/libc/runtime/interruptiblecall.c b/libc/runtime/interruptiblecall.c index 619d63ab..10cfda5a 100644 --- a/libc/runtime/interruptiblecall.c +++ b/libc/runtime/interruptiblecall.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/progn.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/mem/mem.h" #include "libc/runtime/interruptiblecall.h" #include "libc/runtime/runtime.h" diff --git a/libc/runtime/isheap.c b/libc/runtime/isheap.c index ede96732..86f7edf0 100644 --- a/libc/runtime/isheap.c +++ b/libc/runtime/isheap.c @@ -29,7 +29,8 @@ */ bool isheap(void *p) { int x, i; - register uintptr_t rsp asm("rsp"); + uintptr_t rsp; + asm("mov\t%%rsp,%0" : "=r"(rsp)); if (ROUNDDOWN(rsp, STACKSIZE) == ROUNDDOWN((intptr_t)p, STACKSIZE)) { return false; } else { diff --git a/libc/runtime/mapanon.c b/libc/runtime/mapanon.c index 4486f373..d4d834b6 100644 --- a/libc/runtime/mapanon.c +++ b/libc/runtime/mapanon.c @@ -22,6 +22,16 @@ #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" +/** + * Helper function for allocating anonymous mapping. + * + * This function is equivalent to: + * + * mmap(NULL, mapsize, PROT_READ | PROT_WRITE, + * MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + * + * Except it offers a small saving on code size. + */ void *mapanon(size_t mapsize) { return mmap(NULL, mapsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/libc/runtime/mapelfread.c b/libc/runtime/mapelfread.c index 27dff9e1..dc660718 100644 --- a/libc/runtime/mapelfread.c +++ b/libc/runtime/mapelfread.c @@ -19,10 +19,10 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/elf/def.h" #include "libc/elf/elf.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #include "libc/runtime/internal.h" -Elf64_Ehdr *MapElfRead(const char *filename, struct MappedFile *mf) { +hidden Elf64_Ehdr *MapElfRead(const char *filename, struct MappedFile *mf) { if (MapFileRead(filename, mf) != -1 && IsElf64Binary(mf->addr, mf->size)) { return mf->addr; } else { diff --git a/libc/runtime/mmap.c b/libc/runtime/mmap.c index 849cf4c8..f3f2be71 100644 --- a/libc/runtime/mmap.c +++ b/libc/runtime/mmap.c @@ -84,7 +84,7 @@ void *mmap(void *addr, size_t size, int prot, int flags, int fd, int64_t off) { } addr = (void *)(intptr_t)((int64_t)x << 16); } - dm = DirectMap(addr, size, prot, flags | MAP_FIXED, fd, off); + dm = __mmap(addr, size, prot, flags | MAP_FIXED, fd, off); if (dm.addr == MAP_FAILED || dm.addr != addr) { return MAP_FAILED; } diff --git a/libc/runtime/ntgetmodule.c b/libc/runtime/ntgetmodule.c deleted file mode 100644 index 6b2342c0..00000000 --- a/libc/runtime/ntgetmodule.c +++ /dev/null @@ -1,37 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/nt/ntdll.h" -#include "libc/nt/struct/ldr.h" -#include "libc/nt/struct/ldrdatatableentry.h" -#include "libc/nt/struct/linkedlist.h" -#include "libc/nt/struct/teb.h" -#include "libc/str/str.h" - -textwindows const struct NtLdrDataTableEntry *NtGetModule( - const char *basename) { - struct NtLinkedList *head = &NtGetPeb()->Ldr->InLoadOrderModuleList; - struct NtLinkedList *ldr = head->Next; - do { - const struct NtLdrDataTableEntry *dll = - (const struct NtLdrDataTableEntry *)ldr; - if (strcasecmp8to16(basename, dll->BaseDllName.Data) == 0) return dll; - } while ((ldr = ldr->Next) && ldr != head); - return NULL; -} diff --git a/libc/runtime/piro.c b/libc/runtime/piro.c index 475fca1b..1e3248af 100644 --- a/libc/runtime/piro.c +++ b/libc/runtime/piro.c @@ -33,7 +33,7 @@ ╠──────────────────────────────────────────────────────▌▀▄─▐──▀▄─▐▄─▐▄▐▄─▐▄─▐▄─│ │ αcτµαlly pδrταblε εxεcµταblε § post-initialization read-only │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/runtime/internal.h" diff --git a/libc/runtime/ring.h b/libc/runtime/ring.h deleted file mode 100644 index 22a591b5..00000000 --- a/libc/runtime/ring.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_RUNTIME_RING_H_ -#define COSMOPOLITAN_LIBC_RUNTIME_RING_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -struct RingBuffer { - void *p; - char *_addr; - size_t _size; -}; - -void *ringalloc(struct RingBuffer *, size_t); -int ringfree(struct RingBuffer *); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_RUNTIME_RING_H_ */ diff --git a/libc/runtime/ringalloc.c b/libc/runtime/ringalloc.c deleted file mode 100644 index 5e1423da..00000000 --- a/libc/runtime/ringalloc.c +++ /dev/null @@ -1,75 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/assert.h" -#include "libc/calls/calls.h" -#include "libc/dce.h" -#include "libc/limits.h" -#include "libc/macros.h" -#include "libc/runtime/ring.h" -#include "libc/runtime/runtime.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/map.h" -#include "libc/sysv/consts/prot.h" - -/** - * Allocates ring buffer. - * - * Reads/writes wrap around on overflow. - * - * ┌────────────┐ - * │ 𝑓₀..𝑓ₙ₋₁ │ - * └┬┬──────────┘ - * │└────────────┐ - * ┌┴────────────┬┴────────────┐ - * │ 𝑣₀..𝑣ₙ₋₁ │ 𝑣ₙ..𝑣ₙ*₂₋₁ │ - * └─────────────┴─────────────┘ - * - * @param r is metadata object owned by caller, initialized to zero - * @param n is byte length - * @return r->p, or NULL w/ errno - * @see ringfree(), balloc() - */ -void *ringalloc(struct RingBuffer *r, size_t n) { - void *a2; - int fd, rc; - size_t grain; - assert(!r->p); - assert(n > 0); - assert(n <= (INT_MAX - FRAMESIZE + 1) / 2); - if ((fd = openanon("ring", 0)) != -1) { - grain = ROUNDUP(n, FRAMESIZE); - rc = ftruncate(fd, grain * 2); - assert(rc != -1); - r->_size = grain * 2; - r->_addr = mmap(NULL, grain, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (r->_addr != MAP_FAILED) { - a2 = mmap(r->_addr + grain, grain, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, fd, grain - n); - assert(a2 != MAP_FAILED); - r->p = r->_addr + grain - n; - if (IsWindows()) { - memset(r->p, 0, n); /* @see ftruncate() */ - } - } - } - rc = close(fd); - assert(rc != -1); - return r->p; -} diff --git a/libc/runtime/symbols.internal.h b/libc/runtime/symbols.internal.h index 9608336e..38de507d 100644 --- a/libc/runtime/symbols.internal.h +++ b/libc/runtime/symbols.internal.h @@ -1,7 +1,7 @@ #ifndef COSMOPOLITAN_LIBC_SYMBOLS_H_ #define COSMOPOLITAN_LIBC_SYMBOLS_H_ #include "libc/elf/elf.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ diff --git a/libc/runtime/unsetenv.c b/libc/runtime/unsetenv.c index 66e89d88..455e1c04 100644 --- a/libc/runtime/unsetenv.c +++ b/libc/runtime/unsetenv.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" #include "libc/sysv/errfuns.h" diff --git a/libc/sock/closesocket-nt.c b/libc/sock/closesocket-nt.c index 18d0cfea..5d557984 100644 --- a/libc/sock/closesocket-nt.c +++ b/libc/sock/closesocket-nt.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/nt/winsock.h" #include "libc/sock/internal.h" diff --git a/libc/sock/kntwsadata.c b/libc/sock/kntwsadata.c index a3782736..ee097976 100644 --- a/libc/sock/kntwsadata.c +++ b/libc/sock/kntwsadata.c @@ -31,7 +31,7 @@ * Berkeley Socket wrappers are linked. The latest version of Winsock * was introduced alongside x64, so this should never fail. */ -struct NtWsaData kNtWsaData; +hidden struct NtWsaData kNtWsaData; textwindows static void winsockfini(void) { WSACleanup(); diff --git a/libc/sock/sendfile.c b/libc/sock/sendfile.c index 78ebdedf..37531be0 100644 --- a/libc/sock/sendfile.c +++ b/libc/sock/sendfile.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/dce.h" diff --git a/libc/stdio/fscanf.c b/libc/stdio/fscanf.c index 939e4d47..b4aecaf9 100644 --- a/libc/stdio/fscanf.c +++ b/libc/stdio/fscanf.c @@ -25,9 +25,9 @@ * * To read a line of data from a well-formed trustworthy file: * - * int x, y; - * char text[256]; - * fscanf(f, "%d %d %s\n", &x, &y, text); + * int x, y; + * char text[256]; + * fscanf(f, "%d %d %s\n", &x, &y, text); * * Please note that this function is brittle by default, which makes it * a good fit for yolo coding. With some toil it can be used in a way diff --git a/libc/stdio/g_stdbuf.c b/libc/stdio/g_stdbuf.c index 1d17829e..c932ebef 100644 --- a/libc/stdio/g_stdbuf.c +++ b/libc/stdio/g_stdbuf.c @@ -19,5 +19,5 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/stdio/internal.h" -_Alignas(PAGESIZE) unsigned char g_stdoutbuf[BUFSIZ]; -_Alignas(PAGESIZE) unsigned char g_stderrbuf[BUFSIZ]; +unsigned char g_stdoutbuf[BUFSIZ] hidden; +unsigned char g_stderrbuf[BUFSIZ] hidden; diff --git a/libc/stdio/g_stdio.c b/libc/stdio/g_stdio.c index e578aa97..c51cef02 100644 --- a/libc/stdio/g_stdio.c +++ b/libc/stdio/g_stdio.c @@ -19,4 +19,4 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/stdio/internal.h" -FILE g_stdio[3]; +hidden FILE g_stdio[3]; diff --git a/libc/stdio/printf.c b/libc/stdio/printf.c index 2cd3cfdc..4c9cb1fe 100644 --- a/libc/stdio/printf.c +++ b/libc/stdio/printf.c @@ -25,39 +25,39 @@ * Cosmopolitan supports most of the standard formatting behaviors * described by `man 3 printf`, in addition to the following: * - * - %jd, %jx, etc. are {,u}intmax_t which in Cosmopolitan is 128-bit. + * - `%jd`, `%jx`, etc. are {,u}intmax_t which in Cosmopolitan is 128-bit. * - * - %'d or %,d may be used to insert thousands separators. The prior is + * - `%'d` or `%,d` may be used to insert thousands separators. The prior is * consistent with C; the latter is consistent with Python. * - * - %m inserts strerror(errno) into the formatted output. This is + * - `%m` inserts strerror(errno) into the formatted output. This is * consistent with glibc, musl, and uclibc. * - * - %n inserts "\n" on non-Windows and "\r\n" on Windows. This is the + * - `%n` inserts "\n" on non-Windows and "\r\n" on Windows. This is the * same behavior as Java. It's incompatible with glibc's behavior, * since that's problematic according to Android's security team. * - * - %hs converts UTF-16/UCS-2 → UTF-8, which can be helpful on Windows. + * - `%hs` converts UTF-16/UCS-2 → UTF-8, which can be helpful on Windows. * Formatting (e.g. %-10hs) will use monospace display width rather * than string length or codepoint count. * - * - %ls (or %Ls) converts UTF-32 → UTF-8. Formatting (e.g. %-10ls) will - * use monospace display width rather than string length. + * - `%ls` (or `%Ls`) converts UTF-32 → UTF-8. Formatting (e.g. %-10ls) + * will use monospace display width rather than string length. * - * - The %#s and %#c alternate forms display values using the standard - * IBM standard 256-letter alphabet. Using %#.*s to specify length - * will allow true binary (i.e. with NULs) to be formatted. + * - The `%#s` and `%#c` alternate forms display values using the + * standard IBM standard 256-letter alphabet. Using `%#.*s` to specify + * length will allow true binary (i.e. with NULs) to be formatted. * - * - The %'s and %'c alternate forms are Cosmopolitan extensions for + * - The `%'s` and `%'c` alternate forms are Cosmopolitan extensions for * escaping string literals for C/C++ and Python. The outer quotation - * marks can be added automatically using %`s. If constexpr format + * marks can be added automatically using ``%`s``. If constexpr format * strings are used, we can avoid linking cescapec() too. * - * - The backtick modifier (%`s and %`c) and repr() directive (%r) both - * ask the formatting machine to represent values as real code rather - * than using arbitrary traditions for displaying values. This means - * it implies the quoting modifier, wraps the value with {,u,L}['"] - * quotes, displays NULL as "NULL" rather than "(null)", etc. + * - The backtick modifier (``%`s`` and ``%`c``) and repr() directive + * (`%r`) both ask the formatting machine to represent values as real + * code rather than using arbitrary traditions for displaying values. + * This means it implies the quoting modifier, wraps the value with + * {,u,L}['"] quotes, displays NULL as "NULL" rather than "(null)". * * @see palandprintf() for intuitive reference documentation * @see {,v}{,s{,n},{,{,x}as},f,d}printf diff --git a/libc/str/ispunct.c b/libc/str/ispunct.c index 281eacc2..b60b3aa0 100644 --- a/libc/str/ispunct.c +++ b/libc/str/ispunct.c @@ -20,7 +20,7 @@ #include "libc/str/str.h" /** - * Returns nonzero if c ∈ !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ + * Returns nonzero if ``c ∈ !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~`` */ int ispunct(int c) { return (0x21 <= c && c <= 0x7E) && !('0' <= c && c <= '9') && diff --git a/libc/str/kx86processormodels.c b/libc/str/kx86processormodels.c index 12c402ff..2181c6eb 100644 --- a/libc/str/kx86processormodels.c +++ b/libc/str/kx86processormodels.c @@ -21,7 +21,7 @@ #include "libc/macros.h" #include "libc/nexgen32e/x86info.h" -const struct X86ProcessorModel kX86ProcessorModels[] = { +hidden const struct X86ProcessorModel kX86ProcessorModels[] = { /* */ {0x060F, X86_MARCH_CORE2, X86_GRADE_CLIENT}, {0x0616, X86_MARCH_CORE2, X86_GRADE_MOBILE}, @@ -80,4 +80,4 @@ const struct X86ProcessorModel kX86ProcessorModels[] = { /* */ }; -const size_t kX86ProcessorModelCount = ARRAYLEN(kX86ProcessorModels); +hidden const size_t kX86ProcessorModelCount = ARRAYLEN(kX86ProcessorModels); diff --git a/libc/str/sha256.c b/libc/str/sha256.c index d903b451..33290a13 100644 --- a/libc/str/sha256.c +++ b/libc/str/sha256.c @@ -11,7 +11,7 @@ * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf This implementation uses little endian byte order. *********************************************************************/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/dce.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/internal.h" diff --git a/libc/str/strlcat.c b/libc/str/strlcat.c index 6eef2671..4e8fd972 100644 --- a/libc/str/strlcat.c +++ b/libc/str/strlcat.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/str.h" /** diff --git a/libc/str/strlcpy.c b/libc/str/strlcpy.c index 31936077..0dd74a4f 100644 --- a/libc/str/strlcpy.c +++ b/libc/str/strlcpy.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/macros.h" #include "libc/str/str.h" diff --git a/libc/str/tpdecode.ncabi.c b/libc/str/tpdecode.ncabi.c index b3133a23..e3587628 100644 --- a/libc/str/tpdecode.ncabi.c +++ b/libc/str/tpdecode.ncabi.c @@ -33,6 +33,7 @@ forceinline int getbyte(void *arg, uint32_t i) { * @return number of bytes successfully consumed or -1 w/ errno * @note synchronization is performed * @see libc/str/tpdecodecb.internal.h (for implementation) + * @deprecated */ int(tpdecode)(const char *s, wint_t *out) { return tpdecodecb(out, (unsigned char)s[0], getbyte, (void *)s); diff --git a/libc/str/tpencode.ncabi.c b/libc/str/tpencode.ncabi.c index f3d31c5b..acba7cbb 100644 --- a/libc/str/tpencode.ncabi.c +++ b/libc/str/tpencode.ncabi.c @@ -36,6 +36,7 @@ * @param awesome mode enables numbers the IETF unilaterally banned * @return number of bytes written * @note this encoding was designed on a napkin in a new jersey diner + * @deprecated */ unsigned(tpencode)(char *p, size_t size, wint_t wc, bool32 awesome) { int i, j; diff --git a/libc/sysv/systemfive.S b/libc/sysv/systemfive.S index 27aeae14..d918ab67 100644 --- a/libc/sysv/systemfive.S +++ b/libc/sysv/systemfive.S @@ -61,6 +61,11 @@ │ cosmopolitan § bell system five » system call support ─╬─│┼ ╚────────────────────────────────────────────────────────────────────────────│*/ + .initbss 300,_init_systemfive +__hostos: + .quad 0 + .endobj __hostos,globl,hidden + / Performs System Five System Call. / / Cosmopolitan is designed to delegate all function calls into the @@ -83,10 +88,6 @@ / @return %rax:%rdx is result, or -1 w/ errno on error / @clob %rcx,%r10,%r11 / @see syscalls.sh - .initbss 300,_init_systemfive -__hostos: - .quad 0 - .endobj __hostos,globl,hidden __systemfive: .quad 0 .endobj __systemfive,globl,hidden diff --git a/libc/testlib/comborunner.c b/libc/testlib/comborunner.c index 4cbae97e..59f919e4 100644 --- a/libc/testlib/comborunner.c +++ b/libc/testlib/comborunner.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/fmt.h" #include "libc/mem/mem.h" #include "libc/str/str.h" diff --git a/libc/testlib/formatstr.c b/libc/testlib/formatstr.c index 9327f37e..d6e6a39f 100644 --- a/libc/testlib/formatstr.c +++ b/libc/testlib/formatstr.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/progn.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/str.h" #include "libc/testlib/testlib.h" #include "libc/x/x.h" diff --git a/libc/testlib/showerror_.c b/libc/testlib/showerror_.c index 1b2c7e78..413a45a7 100644 --- a/libc/testlib/showerror_.c +++ b/libc/testlib/showerror_.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/errno.h" #include "libc/fmt/fmt.h" diff --git a/libc/testlib/testmain.c b/libc/testlib/testmain.c index d51ee5d0..ce4d73f3 100644 --- a/libc/testlib/testmain.c +++ b/libc/testlib/testmain.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/log/log.h" #include "libc/nexgen32e/x86feature.h" diff --git a/libc/testlib/testmem.c b/libc/testlib/testmem.c index a24729a4..e8f061c1 100644 --- a/libc/testlib/testmem.c +++ b/libc/testlib/testmem.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/fmt/bing.internal.h" diff --git a/libc/time/localtime.c b/libc/time/localtime.c index 1e02adec..1fa7587d 100644 --- a/libc/time/localtime.c +++ b/libc/time/localtime.c @@ -247,9 +247,9 @@ time_t altzone; #endif /* defined ALTZONE */ static int32_t -detzcode(codep) - const char * const codep; -{ +detzcode( + const char * const codep +) { register int32_t result; register int i; result = (codep[0] & 0x80) ? ~0L : 0; @@ -259,9 +259,9 @@ detzcode(codep) } static time_t -detzcode64(codep) - const char * const codep; -{ +detzcode64( + const char * const codep +) { register time_t result; register int i; result = (codep[0] & 0x80) ? (~(int_fast64_t) 0) : 0; @@ -336,10 +336,10 @@ settzname(void) } forceinline int -differ_by_repeat(t1, t0) - const time_t t1; - const time_t t0; -{ +differ_by_repeat( + const time_t t1, + const time_t t0 +) { if (TYPE_INTEGRAL(time_t) && TYPE_BIT(time_t) - TYPE_SIGNED(time_t) < SECSPERREPEAT_BITS) return 0; @@ -347,19 +347,21 @@ differ_by_repeat(t1, t0) } forceinline int -cmpstr(l, r) - const char *l, *r; -{ +cmpstr( + const char *l, + const char *r +) { size_t i = 0; while (l[i] == r[i] && r[i]) ++i; return (l[i] & 0xff) - (r[i] & 0xff); } static int -typesequiv(sp, a, b) - int a, b; - const struct state *sp; -{ +typesequiv( + const struct state *sp, + int a, + int b +) { int result; if (sp == NULL || a < 0 || a >= sp->typecnt || @@ -379,11 +381,11 @@ typesequiv(sp, a, b) } static int -tzload(name, sp, doextend) - register const char * name; - register struct state * const sp; - register const int doextend; -{ +tzload( + const char * name, + struct state * const sp, + const int doextend +) { register const char * p; register int i; register int fid; @@ -624,9 +626,9 @@ static const int kYearLengths[2] = { */ static const char * -getzname(strp) - const char * strp; -{ +getzname( + const char * strp +) { char c; while ((c = *strp) != '\0' && !isdigit(c) && c != ',' && c != '-' && c != '+') { @@ -645,10 +647,10 @@ getzname(strp) */ static const char * -getqzname(strp, delim) - register const char * strp; - const int delim; -{ +getqzname( + const char * strp, + const int delim +) { register int c; while ((c = *strp) != '\0' && c != delim) @@ -664,12 +666,12 @@ getqzname(strp, delim) */ static const char * -getnum(strp, nump, min, max) - register const char * strp; - int * const nump; - const int min; - const int max; -{ +getnum( + const char * strp, + int * const nump, + const int min, + const int max +) { register char c; register int num; @@ -697,10 +699,10 @@ getnum(strp, nump, min, max) */ static const char * -getsecs(strp, secsp) - register const char * strp; - int32_t * const secsp; -{ +getsecs( + const char * strp, + int32_t * const secsp +) { int num; /* ** `HOURSPERDAY * DAYSPERWEEK - 1' allows quasi-Posix rules like @@ -738,10 +740,10 @@ getsecs(strp, secsp) */ static const char * -getoffset(strp, offsetp) - register const char * strp; - int32_t * const offsetp; -{ +getoffset( + const char * strp, + int32_t * const offsetp +) { register int neg = 0; if (*strp == '-') { neg = 1; @@ -764,10 +766,10 @@ getoffset(strp, offsetp) */ static const char * -getrule(strp, rulep) - const char * strp; - register struct rule * const rulep; -{ +getrule( + const char * strp, + struct rule * const rulep +) { if (*strp == 'J') { /* ** Julian day. @@ -818,16 +820,16 @@ getrule(strp, rulep) */ static time_t -transtime(janfirst, year, rulep, offset) - const time_t janfirst; - const int year; - register const struct rule * const rulep; - const int32_t offset; -{ - register int leapyear; - register time_t value; - register int i; - int d, m1, yy0, yy1, yy2, dow; +transtime( + const time_t janfirst, + const int year, + const struct rule * const rulep, + const int32_t offset +) { + register int leapyear; + register time_t value; + register int i; + int d, m1, yy0, yy1, yy2, dow; INITIALIZE(value); leapyear = isleap(year); @@ -913,21 +915,21 @@ transtime(janfirst, year, rulep, offset) */ static int -tzparse(name, sp, lastditch) - const char * name; - register struct state * const sp; - const int lastditch; -{ - const char * stdname; - const char * dstname; - size_t stdlen; - size_t dstlen; - int32_t stdoffset; - int32_t dstoffset; - register time_t * atp; - register unsigned char * typep; - register char * cp; - register int load_result; +tzparse( + const char * name, + struct state * const sp, + const int lastditch +) { + const char * stdname; + const char * dstname; + size_t stdlen; + size_t dstlen; + int32_t stdoffset; + int32_t dstoffset; + register time_t * atp; + register unsigned char *typep; + register char * cp; + register int load_result; INITIALIZE(dstname); stdname = name; @@ -1148,9 +1150,9 @@ tzparse(name, sp, lastditch) } static void -gmtload(sp) - struct state * const sp; -{ +gmtload( + struct state * const sp +) { if (tzload(gmt, sp, TRUE) != 0) (void) tzparse(gmt, sp, TRUE); } @@ -1243,11 +1245,11 @@ tzset(void) /*ARGSUSED*/ static struct tm * -localsub(timep, offset, tmp) - const time_t * const timep; - const int32_t offset; - struct tm * const tmp; -{ +localsub( + const time_t * const timep, + const int32_t offset, + struct tm * const tmp +) { register struct state * sp; register const struct ttinfo * ttisp; register int i; @@ -1335,9 +1337,9 @@ localsub(timep, offset, tmp) } struct tm * -localtime(timep) - const time_t * const timep; -{ +localtime( + const time_t * const timep +) { tzset(); return localsub(timep, 0L, &tm); } @@ -1347,10 +1349,10 @@ localtime(timep) */ struct tm * -localtime_r(timep, tmp) - const time_t * const timep; - struct tm * tmp; -{ +localtime_r( + const time_t * const timep, + struct tm * tmp +) { tzset(); return localsub(timep, 0L, tmp); } @@ -1360,11 +1362,11 @@ localtime_r(timep, tmp) */ static struct tm * -gmtsub(timep, offset, tmp) - const time_t * const timep; - const int32_t offset; - struct tm * const tmp; -{ +gmtsub( + const time_t * const timep, + const int32_t offset, + struct tm * const tmp +) { register struct tm * result; if (!gmt_is_set) { gmt_is_set = TRUE; @@ -1398,9 +1400,9 @@ gmtsub(timep, offset, tmp) } struct tm * -gmtime(timep) - const time_t * const timep; -{ +gmtime( + const time_t * const timep +) { return gmtsub(timep, 0L, &tm); } @@ -1409,20 +1411,20 @@ gmtime(timep) */ struct tm * -gmtime_r(timep, tmp) - const time_t * const timep; - struct tm * tmp; -{ +gmtime_r( + const time_t * const timep, + struct tm * tmp +) { return gmtsub(timep, 0L, tmp); } #ifdef STD_INSPIRED struct tm * -offtime(timep, offset) - const time_t * const timep; - const int32_t offset; -{ +offtime( + const time_t * const timep, + const int32_t offset +) { return gmtsub(timep, offset, &tm); } @@ -1434,20 +1436,20 @@ offtime(timep, offset) */ pureconst optimizespeed static int -leaps_thru_end_of(y) - register const int y; -{ +leaps_thru_end_of( + const int y +) { return (y >= 0) ? (y / 4 - y / 100 + y / 400) : -(leaps_thru_end_of(-(y + 1)) + 1); } static struct tm * -timesub(timep, offset, sp, tmp) - const time_t * const timep; - const int32_t offset; - const struct state * const sp; - struct tm * const tmp; -{ +timesub( + const time_t * const timep, + const int32_t offset, + const struct state * const sp, + struct tm * const tmp +) { const struct lsinfo * lp; time_t tdays; int idays; /* unsigned would be so 2003 */ @@ -1618,10 +1620,10 @@ timesub(timep, offset, sp, tmp) */ static inline int -increment_overflow(number, delta) - int * number; - int delta; -{ +increment_overflow( + int * number, + int delta +) { #ifdef __GNUC__ return __builtin_add_overflow(*number, delta, number); #else @@ -1633,11 +1635,11 @@ increment_overflow(number, delta) } static int -normalize_overflow(tensptr, unitsptr, base) - int * const tensptr; - int * const unitsptr; - const int base; -{ +normalize_overflow( + int * const tensptr, + int * const unitsptr, + const int base +) { register int tensdelta; tensdelta = (*unitsptr >= 0) ? (*unitsptr / base) : @@ -1647,10 +1649,10 @@ normalize_overflow(tensptr, unitsptr, base) } static int -tmcomp(atmp, btmp) - register const struct tm * const atmp; - register const struct tm * const btmp; -{ +tmcomp( + const struct tm * const atmp, + const struct tm * const btmp +) { register int result; if ((result = (atmp->tm_year - btmp->tm_year)) == 0 && (result = (atmp->tm_mon - btmp->tm_mon)) == 0 && @@ -1662,13 +1664,13 @@ tmcomp(atmp, btmp) } static time_t -time2sub(tmp, funcp, offset, okayp, do_norm_secs) - struct tm * const tmp; - struct tm * (* const funcp)(const time_t*, int32_t, struct tm*); - const int32_t offset; - int * const okayp; - const int do_norm_secs; -{ +time2sub( + struct tm * const tmp, + struct tm * (* const funcp)(const time_t*, int32_t, struct tm*), + const int32_t offset, + int * const okayp, + const int do_norm_secs +) { register const struct state * sp; register int dir; register int i, j; @@ -1849,12 +1851,12 @@ label: } static time_t -time2(tmp, funcp, offset, okayp) - struct tm * const tmp; - struct tm * (* const funcp)(const time_t*, int32_t, struct tm*); - const int32_t offset; - int * const okayp; -{ +time2( + struct tm * const tmp, + struct tm * (* const funcp)(const time_t*, int32_t, struct tm*), + const int32_t offset, + int * const okayp +) { time_t t; /* ** First try without normalization of seconds @@ -1866,11 +1868,11 @@ time2(tmp, funcp, offset, okayp) } static time_t -time1(tmp, funcp, offset) - struct tm * const tmp; - struct tm * (* const funcp)(const time_t *, int32_t, struct tm *); - const int32_t offset; -{ +time1( + struct tm * const tmp, + struct tm * (* const funcp)(const time_t *, int32_t, struct tm *), + const int32_t offset +) { register time_t t; register const struct state * sp; register int samei, otheri; @@ -1942,34 +1944,34 @@ time1(tmp, funcp, offset) } time_t -mktime(tmp) - struct tm * const tmp; -{ +mktime( + struct tm * const tmp +) { tzset(); return time1(tmp, localsub, 0L); } time_t -timelocal(tmp) - struct tm * const tmp; -{ +timelocal( + struct tm * const tmp +) { tmp->tm_isdst = -1; /* in case it wasn't initialized */ return mktime(tmp); } time_t -timegm(tmp) - struct tm * const tmp; -{ +timegm( + struct tm * const tmp +) { tmp->tm_isdst = 0; return time1(tmp, gmtsub, 0L); } time_t -timeoff(tmp, offset) - struct tm * const tmp; - const long offset; -{ +timeoff( + struct tm * const tmp, + const long offset +) { tmp->tm_isdst = 0; return time1(tmp, gmtsub, offset); } @@ -1983,9 +1985,9 @@ timeoff(tmp, offset) */ static long -leapcorr(timep) - time_t * timep; -{ +leapcorr( + time_t * timep +) { register struct state * sp; register struct lsinfo * lp; register int i; @@ -2001,17 +2003,17 @@ leapcorr(timep) } pureconst time_t -time2posix(t) - time_t t; -{ +time2posix( + time_t t +) { tzset(); return t - leapcorr(&t); } pureconst time_t -posix2time(t) - time_t t; -{ +posix2time( + time_t t +) { time_t x; time_t y; diff --git a/libc/time/time.h b/libc/time/time.h index 8e5203c5..a33985d3 100644 --- a/libc/time/time.h +++ b/libc/time/time.h @@ -8,11 +8,11 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -extern const char kWeekdayNameShort[7][4]; -extern const char kWeekdayName[7][10]; -extern const char kMonthNameShort[12][4]; -extern const char kMonthName[12][10]; -extern const unsigned short kMonthYearDay[2][12]; +hidden extern const char kWeekdayNameShort[7][4]; +hidden extern const char kWeekdayName[7][10]; +hidden extern const char kMonthNameShort[12][4]; +hidden extern const char kMonthName[12][10]; +hidden extern const unsigned short kMonthYearDay[2][12]; extern char *tzname[2]; extern long CLOCKS_PER_SEC; diff --git a/libc/tinymath/atanl.S b/libc/tinymath/atanl.S index 82a1b0e0..429b816a 100644 --- a/libc/tinymath/atanl.S +++ b/libc/tinymath/atanl.S @@ -22,12 +22,13 @@ / Returns arc tangent of 𝑥. / +/ 1 3 1 5 1 7 1 9 1 11 +/ atan(𝑥) = 𝑥 - - 𝑥 + - 𝑥 - - 𝑥 + - 𝑥 - -- 𝑥 ... +/ 3 5 7 9 11 +/ / @param 𝑥 is an 80-bit long double passed on stack in 16-bytes / @return result of computation on FPU stack in %st / @define atan(𝑥) = Σₙ₌₀₋∞ 2²ⁿ(𝑛!)²/(𝟸𝑛+𝟷)!(𝑥²ⁿ⁺¹/(𝑥²+𝟷)ⁿ⁺¹) -/ 1 3 1 5 1 7 1 9 1 11 -/ @define atan(𝑥) = 𝑥 - - 𝑥 + - 𝑥 - - 𝑥 + - 𝑥 - -- 𝑥 ... -/ 3 5 7 9 11 tinymath_atanl: push %rbp mov %rsp,%rbp diff --git a/libc/tinymath/copysign.S b/libc/tinymath/copysign.S index 8b43e0e1..1449b3ad 100644 --- a/libc/tinymath/copysign.S +++ b/libc/tinymath/copysign.S @@ -24,19 +24,19 @@ tinymath_copysign: .leafprologue .profilable movapd %xmm1,%xmm2 - andpd .L1(%rip),%xmm0 - andpd .L2(%rip),%xmm2 + andpd .Lnan(%rip),%xmm0 + andpd .Lneg0(%rip),%xmm2 orpd %xmm2,%xmm0 .leafepilogue .endfn tinymath_copysign,globl .alias tinymath_copysign,copysign .rodata.cst16 -.L1: .long 4294967295 - .long 2147483647 +.Lnan: .long 0xffffffff + .long 0x7fffffff .long 0 .long 0 -.L2: .long 0 +.Lneg0: .long 0 .long -2147483648 .long 0 .long 0 diff --git a/libc/tinymath/fabs.S b/libc/tinymath/fabs.S index 13f3ede9..2702768f 100644 --- a/libc/tinymath/fabs.S +++ b/libc/tinymath/fabs.S @@ -20,6 +20,10 @@ #include "libc/macros.h" .source __FILE__ +/ Returns absolute value of double. +/ +/ @param xmm0 has double in lower half +/ @return xmm0 has result in lower half fabs: .leafprologue .profilable mov $0x7fffffffffffffff,%rax diff --git a/libc/tinymath/fmax.S b/libc/tinymath/fmax.S deleted file mode 100644 index 98715999..00000000 --- a/libc/tinymath/fmax.S +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" -.source __FILE__ - -tinymath_fmax: - .leafprologue - .profilable - maxsd %xmm1,%xmm0 - .leafepilogue - .endfn tinymath_fmax,globl - .alias tinymath_fmax,fmax diff --git a/libc/calls/fdkind.c b/libc/tinymath/fmax.c similarity index 80% rename from libc/calls/fdkind.c rename to libc/tinymath/fmax.c index 8433323b..065f5480 100644 --- a/libc/calls/fdkind.c +++ b/libc/tinymath/fmax.c @@ -17,12 +17,20 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/internal.h" +#include "libc/tinymath/tinymath.h" -enum FdKind fdkind(int fd) { - if (0 <= fd && fd <= g_fds.n) { - return g_fds.p[fd].kind; - } else { - return kFdEmpty; +/** + * Returns maximum of two doubles. + * + * If one argument is NAN then the other is returned. + * This function is designed to do the right thing with + * signed zeroes. + */ +double fmax(double x, double y) { + if (__builtin_isnan(x)) return y; + if (__builtin_isnan(y)) return x; + if (__builtin_signbit(x) != __builtin_signbit(y)) { + return __builtin_signbit(x) ? y : x; /* C99 Annex F.9.9.2 */ } + return x < y ? y : x; } diff --git a/libc/tinymath/fmaxf.S b/libc/tinymath/fmaxf.S deleted file mode 100644 index 30db9f64..00000000 --- a/libc/tinymath/fmaxf.S +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" -.source __FILE__ - -tinymath_fmaxf: - .leafprologue - .profilable - maxss %xmm1,%xmm0 - .leafepilogue - .endfn tinymath_fmaxf,globl - .alias tinymath_fmaxf,fmaxf diff --git a/libc/tinymath/fminl.S b/libc/tinymath/fmaxf.c similarity index 74% rename from libc/tinymath/fminl.S rename to libc/tinymath/fmaxf.c index 6b5c761c..1132abda 100644 --- a/libc/tinymath/fminl.S +++ b/libc/tinymath/fmaxf.c @@ -1,5 +1,5 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2020 Justine Alexandra Roberts Tunney │ │ │ @@ -17,24 +17,20 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" -.source __FILE__ +#include "libc/tinymath/tinymath.h" -/ Returns minimum of two long doubles. -/ -/ @param 𝑥 is long double passed on stack in 16-bytes -/ @param 𝑦 is also pushed on stack, in reverse order -/ @return result in %st0 -tinymath_fminl: - push %rbp - mov %rsp,%rbp - .profilable - fldt 32(%rbp) - fldt 16(%rbp) - fcomi %st(1),%st - fcmovnbe %st(1),%st - fstp %st(1) - pop %rbp - ret - .endfn tinymath_fminl,globl - .alias tinymath_fminl,fminl +/** + * Returns maximum of two floats. + * + * If one argument is NAN then the other is returned. + * This function is designed to do the right thing with + * signed zeroes. + */ +float fmaxf(float x, float y) { + if (__builtin_isnan(x)) return y; + if (__builtin_isnan(y)) return x; + if (__builtin_signbitf(x) != __builtin_signbitf(y)) { + return __builtin_signbitf(x) ? y : x; /* C99 Annex F.9.9.2 */ + } + return x < y ? y : x; +} diff --git a/libc/tinymath/fmaxl.S b/libc/tinymath/fmaxl.S deleted file mode 100644 index e920fdb1..00000000 --- a/libc/tinymath/fmaxl.S +++ /dev/null @@ -1,40 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" -.source __FILE__ - -/ Returns maximum of two long doubles. -/ -/ @param 𝑥 is long double passed on stack in 16-bytes -/ @param 𝑦 is also pushed on stack, in reverse order -/ @return result in %st0 -tinymath_fmaxl: - push %rbp - mov %rsp,%rbp - .profilable - fldt 32(%rbp) - fldt 16(%rbp) - fcomi %st(1),%st - fcmovb %st(1),%st - fstp %st(1) - pop %rbp - ret - .endfn tinymath_fmaxl,globl - .alias tinymath_fmaxl,fmaxl diff --git a/libc/tinymath/fmaxl.c b/libc/tinymath/fmaxl.c new file mode 100644 index 00000000..91129e6b --- /dev/null +++ b/libc/tinymath/fmaxl.c @@ -0,0 +1,36 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/tinymath/tinymath.h" + +/** + * Returns maximum of two long doubles. + * + * If one argument is NAN then the other is returned. + * This function is designed to do the right thing with + * signed zeroes. + */ +long double fmaxl(long double x, long double y) { + if (__builtin_isnan(x)) return y; + if (__builtin_isnan(y)) return x; + if (__builtin_signbitl(x) != __builtin_signbitl(y)) { + return __builtin_signbitl(x) ? y : x; /* C99 Annex F.9.9.2 */ + } + return x < y ? y : x; +} diff --git a/libc/tinymath/fmin.S b/libc/tinymath/fmin.S deleted file mode 100644 index c3a40573..00000000 --- a/libc/tinymath/fmin.S +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" -.source __FILE__ - -tinymath_fmin: - .leafprologue - .profilable - minsd %xmm1,%xmm0 - .leafepilogue - .endfn tinymath_fmin,globl - .alias tinymath_fmin,fmin diff --git a/test/libc/runtime/ringalloc_test.c b/libc/tinymath/fmin.c similarity index 74% rename from test/libc/runtime/ringalloc_test.c rename to libc/tinymath/fmin.c index f5495aeb..68b8bcce 100644 --- a/test/libc/runtime/ringalloc_test.c +++ b/libc/tinymath/fmin.c @@ -17,32 +17,20 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/runtime/ring.h" -#include "libc/testlib/testlib.h" +#include "libc/tinymath/tinymath.h" -TEST(ringalloc, testMagic) { - char *p; - size_t n; - struct RingBuffer ring = {}; - n = FRAMESIZE * 2; - EXPECT_NE(NULL, ringalloc(&ring, n)); - if ((p = ring.p)) { - EXPECT_EQ(0, p[0]); - EXPECT_EQ(0, p[7]); - EXPECT_EQ(0, p[n + 0]); - EXPECT_EQ(0, p[n + 7]); - p[0] = 23; - p[7] = 123; - EXPECT_EQ(23, p[0]); - EXPECT_EQ(123, p[7]); - EXPECT_EQ(23, p[n + 0]); - EXPECT_EQ(123, p[n + 7]); +/** + * Returns minimum of two doubles. + * + * If one argument is NAN then the other is returned. + * This function is designed to do the right thing with + * signed zeroes. + */ +double fmin(double x, double y) { + if (__builtin_isnan(x)) return y; + if (__builtin_isnan(y)) return x; + if (__builtin_signbit(x) != __builtin_signbit(y)) { + return __builtin_signbit(x) ? x : y; /* C99 Annex F.9.9.2 */ } - EXPECT_NE(-1, ringfree(&ring)); -} - -TEST(ringalloc, testFrameSized) { - struct RingBuffer ring = {}; - EXPECT_NE(NULL, ringalloc(&ring, FRAMESIZE)); - EXPECT_NE(-1, ringfree(&ring)); + return x < y ? x : y; } diff --git a/libc/tinymath/fminf.S b/libc/tinymath/fminf.S deleted file mode 100644 index 729b0ba3..00000000 --- a/libc/tinymath/fminf.S +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" -.source __FILE__ - -tinymath_fminf: - .leafprologue - .profilable - minss %xmm1,%xmm0 - .leafepilogue - .endfn tinymath_fminf,globl - .alias tinymath_fminf,fminf diff --git a/libc/runtime/ringfree.c b/libc/tinymath/fminf.c similarity index 81% rename from libc/runtime/ringfree.c rename to libc/tinymath/fminf.c index 8b968108..4f780238 100644 --- a/libc/runtime/ringfree.c +++ b/libc/tinymath/fminf.c @@ -17,19 +17,20 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/runtime/ring.h" -#include "libc/runtime/runtime.h" +#include "libc/tinymath/tinymath.h" /** - * Frees ring buffer. + * Returns minimum of two floats. * - * @return 0 on success, or -1 w/ errno + * If one argument is NAN then the other is returned. + * This function is designed to do the right thing with + * signed zeroes. */ -int ringfree(struct RingBuffer *r) { - if (r->p) { - r->p = NULL; - return munmap(r->_addr, r->_size); - } else { - return 0; +float fmin(float x, float y) { + if (__builtin_isnan(x)) return y; + if (__builtin_isnan(y)) return x; + if (__builtin_signbitf(x) != __builtin_signbitf(y)) { + return __builtin_signbitf(x) ? x : y; /* C99 Annex F.9.9.2 */ } + return x < y ? x : y; } diff --git a/libc/tinymath/fminl.c b/libc/tinymath/fminl.c new file mode 100644 index 00000000..71d6f0f0 --- /dev/null +++ b/libc/tinymath/fminl.c @@ -0,0 +1,36 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/tinymath/tinymath.h" + +/** + * Returns minimum of two long doubles. + * + * If one argument is NAN then the other is returned. + * This function is designed to do the right thing with + * signed zeroes. + */ +long double fminl(long double x, long double y) { + if (__builtin_isnan(x)) return y; + if (__builtin_isnan(y)) return x; + if (__builtin_signbitl(x) != __builtin_signbitl(y)) { + return __builtin_signbitl(x) ? x : y; /* C99 Annex F.9.9.2 */ + } + return x < y ? x : y; +} diff --git a/libc/unicode/strnwidth16.c b/libc/unicode/strnwidth16.c index d0c6ed04..180cca09 100644 --- a/libc/unicode/strnwidth16.c +++ b/libc/unicode/strnwidth16.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/oldutf16.internal.h" #include "libc/str/str.h" #include "libc/unicode/unicode.h" diff --git a/libc/unicode/strwidth16.c b/libc/unicode/strwidth16.c index 2fd95a88..479321ac 100644 --- a/libc/unicode/strwidth16.c +++ b/libc/unicode/strwidth16.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/limits.h" #include "libc/unicode/unicode.h" diff --git a/libc/x/xjoinpaths.c b/libc/x/xjoinpaths.c index 92cb74cb..8a65d80b 100644 --- a/libc/x/xjoinpaths.c +++ b/libc/x/xjoinpaths.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/str.h" #include "libc/x/x.h" diff --git a/libc/x/xstrcat.c b/libc/x/xstrcat.c index 6d4ee5ab..028b526a 100644 --- a/libc/x/xstrcat.c +++ b/libc/x/xstrcat.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/mem/mem.h" #include "libc/str/str.h" #include "libc/x/x.h" diff --git a/libc/zipos/parseuri.c b/libc/zipos/parseuri.c index d5d06b18..c7bb5630 100644 --- a/libc/zipos/parseuri.c +++ b/libc/zipos/parseuri.c @@ -20,7 +20,7 @@ #include "libc/str/str.h" #include "libc/zipos/zipos.internal.h" -const char kZiposSchemePrefix[4] = "zip:"; +const char kZiposSchemePrefix[4] hidden = "zip:"; /** * Extracts information about ZIP URI if it is one. diff --git a/libc/zipos/read.c b/libc/zipos/read.c index 5f3135ec..0f594172 100644 --- a/libc/zipos/read.c +++ b/libc/zipos/read.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/calls/struct/iovec.h" #include "libc/str/str.h" diff --git a/libc/zipos/stat-impl.c b/libc/zipos/stat-impl.c index fab65612..5769ce6e 100644 --- a/libc/zipos/stat-impl.c +++ b/libc/zipos/stat-impl.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/str/str.h" diff --git a/test/libc/bits/unsignedsubtract_test.c b/test/libc/bits/unsignedsubtract_test.c index 4c425661..a44e2c7a 100644 --- a/test/libc/bits/unsignedsubtract_test.c +++ b/test/libc/bits/unsignedsubtract_test.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/testlib/testlib.h" TEST(unsignedsubtract, testMacro) { diff --git a/test/libc/calls/fallocate_test.c b/test/libc/calls/fallocate_test.c index 06c875f8..05cd71ea 100644 --- a/test/libc/calls/fallocate_test.c +++ b/test/libc/calls/fallocate_test.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/dce.h" diff --git a/test/libc/fmt/palandprintf_test.c b/test/libc/fmt/palandprintf_test.c index ba28931b..4c2391ff 100644 --- a/test/libc/fmt/palandprintf_test.c +++ b/test/libc/fmt/palandprintf_test.c @@ -26,7 +26,7 @@ └─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/progn.internal.h" #include "libc/bits/pushpop.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/errno.h" #include "libc/fmt/fmt.h" #include "libc/fmt/itoa.h" diff --git a/test/libc/fmt/sprintf_s_test.c b/test/libc/fmt/sprintf_s_test.c index 1bac4d84..d8f9000d 100644 --- a/test/libc/fmt/sprintf_s_test.c +++ b/test/libc/fmt/sprintf_s_test.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" #include "libc/bits/progn.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/fmt.h" #include "libc/testlib/testlib.h" diff --git a/test/libc/mem/malloc_test.c b/test/libc/mem/malloc_test.c index 77833f8a..05a5a06f 100644 --- a/test/libc/mem/malloc_test.c +++ b/test/libc/mem/malloc_test.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/macros.h" diff --git a/test/libc/nexgen32e/lz4decode_test.c b/test/libc/nexgen32e/lz4decode_test.c index a8f81acf..e96d947d 100644 --- a/test/libc/nexgen32e/lz4decode_test.c +++ b/test/libc/nexgen32e/lz4decode_test.c @@ -17,12 +17,12 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/log/check.h" #include "libc/nexgen32e/kompressor.h" #include "libc/nexgen32e/lz4.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/testlib/testlib.h" diff --git a/test/libc/nexgen32e/memmove_test.c b/test/libc/nexgen32e/memmove_test.c index dfbabe7c..33ebbeed 100644 --- a/test/libc/nexgen32e/memmove_test.c +++ b/test/libc/nexgen32e/memmove_test.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/str.h" #include "libc/testlib/testlib.h" diff --git a/test/libc/sock/inet_pton_test.c b/test/libc/sock/inet_pton_test.c index 3c442240..851b4963 100644 --- a/test/libc/sock/inet_pton_test.c +++ b/test/libc/sock/inet_pton_test.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/progn.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/sock/sock.h" #include "libc/sysv/consts/af.h" #include "libc/sysv/consts/inaddr.h" diff --git a/test/libc/str/sigset_test.c b/test/libc/str/sigset_test.c index ea4dc8bb..435047d9 100644 --- a/test/libc/str/sigset_test.c +++ b/test/libc/str/sigset_test.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/sigbits.h" #include "libc/str/str.h" diff --git a/test/libc/str/tpdecode_test.c b/test/libc/str/tpdecode_test.c index 11abbc46..750e2c85 100644 --- a/test/libc/str/tpdecode_test.c +++ b/test/libc/str/tpdecode_test.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" #include "libc/bits/progn.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/errno.h" #include "libc/fmt/bing.internal.h" #include "libc/limits.h" diff --git a/test/libc/str/tpencode_test.c b/test/libc/str/tpencode_test.c index 41f6ff74..338d1fc9 100644 --- a/test/libc/str/tpencode_test.c +++ b/test/libc/str/tpencode_test.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" #include "libc/bits/progn.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/str/str.h" #include "libc/str/tpencode.internal.h" #include "libc/testlib/testlib.h" diff --git a/test/tool/build/lib/asmdown_test.c b/test/tool/build/lib/asmdown_test.c new file mode 100644 index 00000000..c1bbdd12 --- /dev/null +++ b/test/tool/build/lib/asmdown_test.c @@ -0,0 +1,135 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/str.h" +#include "libc/testlib/testlib.h" +#include "tool/build/lib/asmdown.h" + +TEST(ParseAsmdown, test) { + struct Asmdown *ad; + const char *s = "\ +#include \"libc/macros.h\"\n\ +.source __FILE__\n\ +\n\ +/ Returns absolute value of double.\n\ +/\n\ +/ @param xmm0 has double in lower half\n\ +/ @return xmm0 has result in lower half\n\ +fabs: .leafprologue\n\ + .profilable\n\ + mov $0x7fffffffffffffff,%rax\n\ + movq %xmm0,%rdx\n\ + and %rax,%rdx\n\ + movq %rdx,%xmm0\n\ + .leafepilogue\n\ + .endfn fabs,globl\n\ +\n\ +/ Returns arc cosine of 𝑥.\n\ +/\n\ +/ This is a description.\n\ +/\n\ +/ @param 𝑥 is double scalar in low half of %xmm0\n\ +/ @return double scalar in low half of %xmm0\n\ +tinymath_acos:\n\ + ezlea tinymath_acosl,ax\n\ + jmp _d2ld2\n\ + .endfn tinymath_acos,globl\n\ +"; + ad = ParseAsmdown(s, strlen(s)); + ASSERT_EQ(2, ad->symbols.n); + + EXPECT_EQ(4, ad->symbols.p[0].line); + EXPECT_STREQ("fabs", ad->symbols.p[0].name); + EXPECT_FALSE(ad->symbols.p[0].javadown->isfileoverview); + EXPECT_STREQ("Returns absolute value of double.", + ad->symbols.p[0].javadown->title); + EXPECT_STREQ("", ad->symbols.p[0].javadown->text); + EXPECT_EQ(2, ad->symbols.p[0].javadown->tags.n); + EXPECT_STREQ("param", ad->symbols.p[0].javadown->tags.p[0].tag); + EXPECT_STREQ("xmm0 has double in lower half", + ad->symbols.p[0].javadown->tags.p[0].text); + EXPECT_STREQ("return", ad->symbols.p[0].javadown->tags.p[1].tag); + EXPECT_STREQ("xmm0 has result in lower half", + ad->symbols.p[0].javadown->tags.p[1].text); + + EXPECT_EQ(17, ad->symbols.p[1].line); + EXPECT_STREQ("tinymath_acos", ad->symbols.p[1].name); + EXPECT_FALSE(ad->symbols.p[1].javadown->isfileoverview); + EXPECT_STREQ("Returns arc cosine of 𝑥.", ad->symbols.p[1].javadown->title); + EXPECT_STREQ("This is a description.\n", ad->symbols.p[1].javadown->text); + EXPECT_EQ(2, ad->symbols.p[1].javadown->tags.n); + EXPECT_STREQ("param", ad->symbols.p[1].javadown->tags.p[0].tag); + EXPECT_STREQ("𝑥 is double scalar in low half of %xmm0", + ad->symbols.p[1].javadown->tags.p[0].text); + EXPECT_STREQ("return", ad->symbols.p[1].javadown->tags.p[1].tag); + EXPECT_STREQ("double scalar in low half of %xmm0", + ad->symbols.p[1].javadown->tags.p[1].text); + + FreeAsmdown(ad); +} + +TEST(ParseAsmdown, testAlias) { + struct Asmdown *ad; + const char *s = "\ +#include \"libc/macros.h\"\n\ +.source __FILE__\n\ +\n\ +/ Returns arc cosine of 𝑥.\n\ +/\n\ +/ This is a description.\n\ +/\n\ +/ @param 𝑥 is double scalar in low half of %xmm0\n\ +/ @return double scalar in low half of %xmm0\n\ +tinymath_acos:\n\ + ezlea tinymath_acosl,ax\n\ + jmp _d2ld2\n\ + .endfn tinymath_acos,globl\n\ + .alias tinymath_acos,acos\n\ +"; + ad = ParseAsmdown(s, strlen(s)); + ASSERT_EQ(2, ad->symbols.n); + + EXPECT_EQ(4, ad->symbols.p[0].line); + EXPECT_STREQ("tinymath_acos", ad->symbols.p[0].name); + EXPECT_FALSE(ad->symbols.p[0].javadown->isfileoverview); + EXPECT_STREQ("Returns arc cosine of 𝑥.", ad->symbols.p[0].javadown->title); + EXPECT_STREQ("This is a description.\n", ad->symbols.p[0].javadown->text); + EXPECT_EQ(2, ad->symbols.p[0].javadown->tags.n); + EXPECT_STREQ("param", ad->symbols.p[0].javadown->tags.p[0].tag); + EXPECT_STREQ("𝑥 is double scalar in low half of %xmm0", + ad->symbols.p[0].javadown->tags.p[0].text); + EXPECT_STREQ("return", ad->symbols.p[0].javadown->tags.p[1].tag); + EXPECT_STREQ("double scalar in low half of %xmm0", + ad->symbols.p[0].javadown->tags.p[1].text); + + EXPECT_EQ(4, ad->symbols.p[1].line); + EXPECT_STREQ("acos", ad->symbols.p[1].name); + EXPECT_FALSE(ad->symbols.p[1].javadown->isfileoverview); + EXPECT_STREQ("Returns arc cosine of 𝑥.", ad->symbols.p[1].javadown->title); + EXPECT_STREQ("This is a description.\n", ad->symbols.p[1].javadown->text); + EXPECT_EQ(2, ad->symbols.p[1].javadown->tags.n); + EXPECT_STREQ("param", ad->symbols.p[1].javadown->tags.p[0].tag); + EXPECT_STREQ("𝑥 is double scalar in low half of %xmm0", + ad->symbols.p[1].javadown->tags.p[0].text); + EXPECT_STREQ("return", ad->symbols.p[1].javadown->tags.p[1].tag); + EXPECT_STREQ("double scalar in low half of %xmm0", + ad->symbols.p[1].javadown->tags.p[1].text); + + FreeAsmdown(ad); +} diff --git a/test/tool/build/lib/javadown_test.c b/test/tool/build/lib/javadown_test.c index db531d5d..87e83fd7 100644 --- a/test/tool/build/lib/javadown_test.c +++ b/test/tool/build/lib/javadown_test.c @@ -228,3 +228,53 @@ laborum\n"; EXPECT_STREQ("asyncsignalsafe", jd->tags.p[3].tag); FreeJavadown(jd); } + +TEST(ParseJavadown, testAsmSyntax) { + const char *comment = "\ +/\tParses javadown \n\ +/\tand that is the bottom line.\n\ +/\n\ +/\tLorem ipsum dolor sit amet, consectetur adipiscing elit, sed do\n\ +/\t\teiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim \n\ +/\t\tad minim veniam, quis nostrud exercitation ullamco laboris nisi ut\n\ +/\t\taliquip ex ea commodo consequat.\n\ +/\n\ +/\tDuis aute irure dolor in reprehenderit in voluptate velit esse cillum\n\ +/\tdolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat\n\ +/\tnon proident, sunt in culpa qui officia deserunt mollit anim id est\n\ +/\tlaborum\n\ +/\n\ +/\t@param data should point to text inside the slash star markers\n\ +/\t\tlorem ipsum dolla dollaz yo\n\ +/\t@param size is length of data in bytes\n\ +/\t@return object that should be passed to FreeJavadown()\n\ +/\t@asyncsignalsafe\n"; + const char *title = "Parses javadown and that is the bottom line."; + const char *description = "\ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do\n\ +\teiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim\n\ +\tad minim veniam, quis nostrud exercitation ullamco laboris nisi ut\n\ +\taliquip ex ea commodo consequat.\n\ +\n\ +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum\n\ +dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat\n\ +non proident, sunt in culpa qui officia deserunt mollit anim id est\n\ +laborum\n"; + struct Javadown *jd; + jd = ParseJavadown(comment, strlen(comment)); + EXPECT_FALSE(jd->isfileoverview); + EXPECT_STREQ(title, jd->title); + EXPECT_STREQ(description, jd->text); + EXPECT_EQ(4, jd->tags.n); + EXPECT_STREQ("param", jd->tags.p[0].tag); + EXPECT_STREQ("data should point to text inside the slash star markers\n" + "lorem ipsum dolla dollaz yo", + jd->tags.p[0].text); + EXPECT_STREQ("param", jd->tags.p[1].tag); + EXPECT_STREQ("size is length of data in bytes", jd->tags.p[1].text); + EXPECT_STREQ("return", jd->tags.p[2].tag); + EXPECT_STREQ("object that should be passed to FreeJavadown()", + jd->tags.p[2].text); + EXPECT_STREQ("asyncsignalsafe", jd->tags.p[3].tag); + FreeJavadown(jd); +} diff --git a/test/tool/viz/lib/test.mk b/test/tool/viz/lib/test.mk index 25244705..f25738e3 100644 --- a/test/tool/viz/lib/test.mk +++ b/test/tool/viz/lib/test.mk @@ -39,8 +39,7 @@ TEST_TOOL_VIZ_LIB_DIRECTDEPS = \ LIBC_TINYMATH \ LIBC_TESTLIB \ LIBC_X \ - TOOL_VIZ_LIB \ - THIRD_PARTY_AVIR + TOOL_VIZ_LIB TEST_TOOL_VIZ_LIB_DEPS := \ $(call uniq,$(foreach x,$(TEST_TOOL_VIZ_LIB_DIRECTDEPS),$($(x)))) diff --git a/third_party/avir/LICENSE b/third_party/avir/LICENSE deleted file mode 100644 index 19aecd7c..00000000 --- a/third_party/avir/LICENSE +++ /dev/null @@ -1,26 +0,0 @@ -AVIR License Agreement - -The MIT License (MIT) - -AVIR Copyright (c) 2015-2019 Aleksey Vaneev - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -Please credit the author of this library in your documentation in the -following way: "AVIR image resizing algorithm designed by Aleksey Vaneev" diff --git a/third_party/avir/README.cosmo b/third_party/avir/README.cosmo deleted file mode 100644 index 345cac31..00000000 --- a/third_party/avir/README.cosmo +++ /dev/null @@ -1,5 +0,0 @@ -commit 7dd9515ef6aed6fb6d565ee12754703bdc46b3b0 -Author: Aleksey Vaneev -Date: Mon Jul 29 07:43:23 2019 +0300 - - Version 2.4 release. diff --git a/third_party/avir/README.md b/third_party/avir/README.md deleted file mode 100644 index bab3b1b5..00000000 --- a/third_party/avir/README.md +++ /dev/null @@ -1,367 +0,0 @@ -# AVIR # -## Introduction ## -Keywords: image resize, image resizer, image resizing, image scaling, -image scaler, image resize c++, image resizer c++ - -Please consider supporting the author on [Patreon](https://www.patreon.com/aleksey_vaneev). - -Me, Aleksey Vaneev, is happy to offer you an open source image resizing / -scaling library which has reached a production level of quality, and is -ready to be incorporated into any project. This library features routines -for both down- and upsizing of 8- and 16-bit, 1 to 4-channel images. Image -resizing routines were implemented in multi-platform C++ code, and have a -high level of optimality. Beside resizing, this library offers a sub-pixel -shift operation. Built-in sRGB gamma correction is available. - -The resizing algorithm at first produces 2X upsized image (relative to the -source image size, or relative to the destination image size if downsizing is -performed) and then performs interpolation using a bank of sinc function-based -fractional delay filters. At the last stage a correction filter is applied -which fixes smoothing introduced at previous steps. - -The resizing algorithm was designed to provide the best visual quality. The -author even believes this algorithm provides the "ultimate" level of -quality (for an orthogonal resizing) which cannot be increased further: no -math exists to provide a better frequency response, better anti-aliasing -quality and at the same time having less ringing artifacts: these are 3 -elements that define any resizing algorithm's quality; in AVIR practice these -elements have a high correlation to each other, so they can be represented by -a single parameter (AVIR offers several parameter sets with varying quality). -Algorithm's time performance turned out to be very good as well (for the -"ultimate" image quality). - -An important element utilized by this algorithm is the so called Peaked Cosine -window function, which is applied over sinc function in all filters. Please -consult the documentation for more details. - -Note that since AVIR implements orthogonal resizing, it may exhibit diagonal -aliasing artifacts. These artifacts are usually suppressed by EWA or radial -filtering techniques. EWA-like technique is not implemented in AVIR, because -it requires considerably more computing resources and may produce a blurred -image. - -As a bonus, a faster `LANCIR` image resizing algorithm is also offered as a -part of this library. But the main focus of this documentation is the original -AVIR image resizing algorithm. - -AVIR does not offer affine and non-linear image transformations "out of the -box". Since upsizing is a relatively fast operation in AVIR (required time -scales linearly with the output image area), affine and non-linear -transformations can be implemented in steps: 4- to 8-times upsizing, -transformation via bilinear interpolation, downsizing (linear proportional -affine transformations can probably skip the downsizing step). This should not -compromise the transformation quality much as bilinear interpolation's -problems will mostly reside in spectral area without useful signal, with a -maximum of 0.7 dB high-frequency attenuation for 4-times upsizing, and 0.17 dB -attenuation for 8-times upsizing. This approach is probably as time efficient -as performing a high-quality transform over the input image directly (the only -serious drawback is the increased memory requirement). Note that affine -transformations that change image proportions should first apply proportion -change during upsizing. - -*AVIR is devoted to women. Your digital photos can look good at any size!* - -## Requirements ## -C++ compiler and system with efficient "float" floating point (24-bit -mantissa) type support. This library can also internally use the "double" and -SIMD floating point types during resizing if needed. This library does not -have dependencies beside the standard C library. - -## Links ## -* [Documentation](https://www.voxengo.com/public/avir/Documentation/) - -## Usage Information ## -The image resizer is represented by the `avir::CImageResizer<>` class, which -is a single front-end class for the whole library. Basically, you do not need -to use nor understand any other classes beside this class. - -The code of the library resides in the "avir" C++ namespace, effectively -isolating it from all other code. The code is thread-safe. You need just -a single resizer object per running application, at any time, even when -resizing images concurrently. - -To resize images in your application, simply add 3 lines of code: - - #include "avir.h" - avir :: CImageResizer<> ImageResizer( 8 ); - ImageResizer.resizeImage( InBuf, 640, 480, 0, OutBuf, 1024, 768, 3, 0 ); - (multi-threaded operation requires additional coding, see the documentation) - -For low-ringing performance: - - avir :: CImageResizer<> ImageResizer( 8, 0, avir :: CImageResizerParamsLR() ); - -To use the built-in gamma correction, an object of the -`avir::CImageResizerVars` class with its variable `UseSRGBGamma` set to "true" -should be supplied to the `resizeImage()` function. Note that the gamma -correction is applied to all channels (e.g. alpha-channel) in the current -implementation. - - avir :: CImageResizerVars Vars; - Vars.UseSRGBGamma = true; - -Dithering (error-diffusion dither which is perceptually good) can be enabled -this way: - - typedef avir :: fpclass_def< float, float, - avir :: CImageResizerDithererErrdINL< float > > fpclass_dith; - avir :: CImageResizer< fpclass_dith > ImageResizer( 8 ); - -The library is able to process images of any bit depth: this includes 8-bit, -16-bit, float and double types. Larger integer and signed integer types are -not supported. Supported source and destination image sizes are only limited -by the available system memory. - -The code of this library was commented in the [Doxygen](http://www.doxygen.org/) -style. To generate the documentation locally you may run the -`doxygen ./other/avirdoxy.txt` command from the library's directory. Note that -the code was suitably documented allowing you to make modifications, and to -gain full understanding of the algorithm. - -Preliminary tests show that this library (compiled with Intel C++ Compiler -18.2 with AVX2 instructions enabled, without explicit SIMD resizing code) can -resize 8-bit RGB 5184x3456 (17.9 Mpixel) 3-channel image down to 1920x1280 -(2.5 Mpixel) image in 245 milliseconds, utilizing a single thread, on Intel -Core i7-7700K processor-based system without overclocking. This scales down to -74 milliseconds if 8 threads are utilized. - -Multi-threaded operation is not provided by this library "out of the box". -The multi-threaded (horizontally-threaded) infrastructure is available, but -requires additional system-specific interfacing code for engagement. - -## SIMD Usage Information ## -This library is capable of using SIMD floating point types for internal -variables. This means that up to 4 color channels can be processed in -parallel. Since the default interleaved processing algorithm itself remains -non-SIMD, the use of SIMD internal types is not practical for 1- and 2-channel -image resizing (due to overhead). SIMD internal type can be used this way: - - #include "avir_float4_sse.h" - avir :: CImageResizer< avir :: fpclass_float4 > ImageResizer( 8 ); - -For 1-channel and 2-channel image resizing when AVX instructions are allowed -it may be reasonable to utilize de-interleaved SIMD processing algorithm. -While it gives no performance benefit if the "float4" SSE processing type is -used, it offers some performance boost if the "float8" AVX processing type is -used (given dithering is not performed, or otherwise performance is reduced at -the dithering stage since recursive dithering cannot be parallelized). The -internal type remains non-SIMD "float". De-interleaved algorithm can be used -this way: - - #include "avir_float8_avx.h" - avir :: CImageResizer< avir :: fpclass_float8_dil > ImageResizer( 8 ); - -It's important to note that on the latest Intel processors (i7-7700K and -probably later) the use of the aforementioned SIMD-specific resizing code may -not be justifiable, or may be even counter-productive due to many factors: -memory bandwidth bottleneck, increased efficiency of processor's circuitry -utilization and out-of-order execution, automatic SIMD optimizations performed -by the compiler. This is at least true when compiling 64-bit code with Intel -C++ Compiler 18.2 with /QxSSE4.2, or especially with the /QxCORE-AVX2 option. -SSE-specific resizing code may still be a little bit more efficient for -4-channel image resizing. - -## Notes ## -This library was tested for compatibility with [GNU C++](http://gcc.gnu.org/), -[Microsoft Visual C++](http://www.microsoft.com/visualstudio/eng/products/visual-studio-express-products) -and [Intel C++](http://software.intel.com/en-us/c-compilers) compilers, on 32- -and 64-bit Windows, macOS and CentOS Linux. The code was also tested with -Dr.Memory/Win32 for the absence of uninitialized or unaddressable memory -accesses. - -All code is fully "inline", without the need to compile any source files. The -memory footprint of the library itself is very modest, except that the size of -the temporary image buffers depends on the input and output image sizes, and -is proportionally large. - -The "heart" of resizing algorithm's quality resides in the parameters defined -via the `avir::CImageResizerParams` structure. While the default set of -parameters that offers a good quality was already provided, there is -(probably) still a place for improvement exists, and the default parameters -may change in a future update. If you need to recall an exact set of -parameters, simply save them locally for a later use. - -When the algorithm is run with no resizing applied (k=1), the result of -resizing will not be an exact, but a very close copy of the source image. The -reason for such inexactness is that the image is always low-pass filtered at -first to reduce aliasing during subsequent resizing, and at last filtered by a -correction filter. Such approach allows algorithm to maintain a stable level -of quality regardless of the resizing "k" factor used. - -This library includes a binary command line tool "imageresize" for major -desktop platforms. This tool was designed to be used as a demonstration of -library's performance, and as a reference, it is multi-threaded (the `-t` -switch can be used to control the number of threads utilized). This tool uses -plain "float" processing (no explicit SIMD) and relies on automatic compiler -optimization (with Win64 binary being the "main" binary as it was compiled -with the best ICC optimization options for the time being). This tool uses the -following libraries: -* turbojpeg Copyright (c) 2009-2013 D. R. Commander -* libpng Copyright (c) 1998-2013 Glenn Randers-Pehrson -* zlib Copyright (c) 1995-2013 Jean-loup Gailly and Mark Adler - -Note that you can enable gamma-correction with the `-g` switch. However, -sometimes gamma-correction produces "greenish/reddish/bluish haze" since -low-amplitude oscillations produced by resizing at object boundaries are -amplified by gamma correction. This can also have an effect of reduced -contrast. - -## Interpolation Discussion ## -The use of certain low-pass filters and 2X upsampling in this library is -hardly debatable, because they are needed to attain a certain anti-aliasing -effect and keep ringing artifacts low. But the use of sinc function-based -interpolation filter that is 18 taps-long (may be higher, up to 36 taps in -practice) can be questioned, because even in 0th order case such -interpolation filter requires 18 multiply-add operations. Comparatively, an -optimal Hermite or cubic interpolation spline requires 8 multiply and 11 add -operations. - -One of the reasons 18-tap filter is preferred, is because due to memory -bandwidth limitations using a lower-order filter does not provide any -significant performance increase (e.g. 14-tap filter is less than 5% more -efficient overall). At the same time, in comparison to cubic spline, 18-tap -filter embeds a low-pass filter that rejects signal above 0.5\*pi (provides -additional anti-aliasing filtering), and this filter has a consistent shape at -all fractional offsets. Splines have a varying low-pass filter shape at -different fractional offsets (e.g. no low-pass filtering at 0.0 offset, -and maximal low-pass filtering at 0.5 offset). 18-tap filter also offers a -superior stop-band attenuation which almost guarantees absence of artifacts if -the image is considerably sharpened afterwards. - -## Why 2X upsizing in AVIR? ## -Classic approaches to image resizing do not perform an additional 2X upsizing. -So, why such upsizing is needed at all in AVIR? Indeed, image resizing can be -implemented using a single interpolation filter which is applied to the source -image directly. However, such approach has limitations: - -First of all, speaking about non-2X-upsized resizing, during upsizing the -interpolation filter has to be tuned to a frequency close to pi (Nyquist) in -order to reduce high-frequency smoothing: this reduces the space left for -filter optimization. Beside that, during downsizing, a filter that performs -well and predictable when tuned to frequencies close to the Nyquist frequency, -may become distorted in its spectral shape when it is tuned to lower -frequencies. That is why it is usually a good idea to have filter's stop-band -begin below Nyquist so that the transition band's shape remains stable at any -lower-frequency setting. At the same time, this requirement complicates a -further corrective filtering, because correction filter may become too steep -at the point where the stop-band begins. - -Secondly, speaking about non-2X-upsized resizing, filter has to be very short -(with a base length of 5-7 taps, further multiplied by the resizing factor) or -otherwise the ringing artifacts will be very strong: it is a general rule that -the steeper the filter is around signal frequencies being removed the higher -the ringing artifacts are. That is why it is preferred to move steep -transitions into the spectral area with a quieter signal. A short filter also -means it cannot provide a strong "beyond-Nyquist" stop-band attenuation, so an -interpolated image will look a bit edgy or not very clean due to stop-band -artifacts. - -To sum up, only additional controlled 2X upsizing provides enough spectral -space to design interpolation filter without visible ringing artifacts yet -providing a strong stop-band attenuation and stable spectral characteristics -(good at any resizing "k" factor). Moreover, 2X upsizing becomes very -important in maintaining a good resizing quality when downsizing and upsizing -by small "k" factors, in the range 0.5 to 2: resizing approaches that do not -perform 2X upsizing usually cannot design a good interpolation filter for such -factors just because there is not enough spectral space available. - -## Why Peaked Cosine in AVIR? ## -First of all, AVIR is a general solution to image resizing problem. That is -why it should not be directly compared to "spline interpolation" or "Lanczos -resampling", because the latter two are only means to design interpolation -filters, and they can be implemented in a variety of ways, even in sub-optimal -ways. Secondly, with only a minimal effort AVIR can be changed to use any -existing interpolation formula and any window function, but this is just not -needed. - -An effort was made to compare Peaked Cosine to Lanczos window function, and -here is the author's opinion. Peaked Cosine has two degrees of freedom whereas -Lanczos has one degree of freedom. While both functions can be used with -acceptable results, Peaked Cosine window function used in automatic parameter -optimization really pushes the limits of frequency response linearity, -anti-aliasing strength (stop-band attenuation) and low-ringing performance -which Lanczos cannot usually achieve. This is true at least when using a -general-purpose downhill simplex optimization method. Lanczos window has good -(but not better) characteristics in several special cases (certain "k" -factors) which makes it of limited use in a general solution such as AVIR. - -Among other window functions (Kaiser, Gaussian, Cauchy, Poisson, generalized -cosine windows) there are no better candidates as well. It looks like Peaked -Cosine function's scalability (it retains stable, almost continously-variable -spectral characteristics at any window parameter values), and its ability to -create "desirable" pass-band ripple in the frequency response near the cutoff -point contribute to its better overall quality. Somehow Peaked Cosine window -function optimization manages to converge to reasonable states in most cases -(that is why AVIR library comes with a set of equally robust, but distinctive -parameter sets) whereas all other window functions tend to produce -unpredictable optimization results. - -The only disadvantage of Peaked Cosine window function is that usable filters -windowed by this function tend to be longer than "usual" (with Kaiser window -being the "golden standard" for filter length per decibel of stop-band -attenuation). This is a price that should be paid for stable spectral -characteristics. - -## LANCIR ## - -As a part of AVIR library, the `CLancIR` class is also offered which is an -optimal implementation of *Lanczos* image resizing filter. This class has a -similar programmatic interface to AVIR, but it is not thread-safe: each -executing thread should have its own `CLancIR` object. This class was designed -for cases of batch processing of same-sized frames like in video encoding. - -LANCIR offers up to 200% faster image resizing in comparison to AVIR. The -quality difference is, however, debatable. Note that while LANCIR can take -8- and 16-bit and float image buffers, its precision is limited to 8-bit -resizing. - -LANCIR should be seen as a bonus and as some kind of quality comparison. -LANCIR uses Lanczos filter "a" parameter equal to 3 which is similar to AVIR's -default setting. - -## Change log ## -Version 2.4: - -* Removed outdated `_mm_reset()` function calls from the SIMD code. -* Changed `float4 round()` to use SSE2 rounding features, avoiding use of -64-bit registers. - -Version 2.3: - -* Implemented CLancIR image resizing algorithm. -* Fixed a minor image offset on image upsizing. - -Version 2.2: - -* Released AVIR under a permissive MIT license agreement. - -Version 2.1: - -* Fixed error-diffusion dither problems introduced in the previous version. -* Added the `-1` switch to the `imageresize` to enable 1-bit output for -dither's quality evaluation (use together with the `-d` switch). -* Added the `--algparams=` switch to the `imageresize` to control resizing -quality (replaces the `--low-ring` switch). -* Added `avir :: CImageResizerParamsULR` parameter set for lowest-ringing -performance possible (not considerably different to -`avir :: CImageResizerParamsLR`, but a bit lower ringing). - -Version 2.0: - -* Minor inner loop optimizations. -* Lifted the supported image size constraint by switching buffer addressing to -`size_t` from `int`, now image size is limited by the available system memory. -* Added several useful switches to the `imageresize` utility. -* Now `imageresize` does not apply gamma-correction by default. -* Fixed scaling of bit depth-reduction operation. -* Improved error-diffusion dither's signal-to-noise ratio. -* Compiled binaries with AVX2 instruction set (SSE4 for macOS). - -## Users ## -This library is used by: - - * [Contaware.com](http://www.contaware.com/) - -Please drop me a note at aleksey.vaneev@gmail.com and I will include a link to -your software product to the list of users. This list is important at -maintaining confidence in this library among the interested parties. diff --git a/third_party/avir/avir.h b/third_party/avir/avir.h deleted file mode 100644 index e4fb3bdc..00000000 --- a/third_party/avir/avir.h +++ /dev/null @@ -1,17065 +0,0 @@ -/* clang-format off */ -//$ nobt -//$ nocpp - -/** - * @file avir.h - * - * @brief The "main" inclusion file with all required classes and functions. - * - * This is the "main" inclusion file for the "AVIR" image resizer. This - * inclusion file contains implementation of the AVIR image resizing algorithm - * in its entirety. Also includes several classes and functions that can be - * useful elsewhere. - * - * AVIR Copyright (c) 2015-2019 Aleksey Vaneev - * - * @mainpage - * - * @section intro_sec Introduction - * - * Description is available at https://github.com/avaneev/avir - * - * AVIR is devoted to women. Your digital photos can look good at any size! - * - * @section license License - * - * AVIR License Agreement - * - * The MIT License (MIT) - * - * Copyright (c) 2015-2019 Aleksey Vaneev - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Please credit the author of this library in your documentation in the - * following way: "AVIR image resizing algorithm designed by Aleksey Vaneev" - * - * @version 2.4 - */ - -#ifndef AVIR_CIMAGERESIZER_INCLUDED -#define AVIR_CIMAGERESIZER_INCLUDED - -#include "third_party/avir/notice.h" -#include "libc/bits/xmmintrin.internal.h" -#include "libc/str/str.h" -#include "libc/mem/mem.h" -#include "libc/bits/bits.h" -#include "libc/math.h" - -namespace avir { - -/** - * The macro defines AVIR version string. - */ - -#define AVIR_VERSION "2.4" - -/** - * The macro equals to "pi" constant, fills 53-bit floating point mantissa. - * Undefined at the end of file. - */ - -#define AVIR_PI 3.1415926535897932 - -/** - * The macro equals to "pi divided by 2" constant, fills 53-bit floating - * point mantissa. Undefined at the end of file. - */ - -#define AVIR_PId2 1.5707963267948966 - -/** - * Rounding function, based on the (int) typecast. Biased result. Not suitable - * for numbers >= 2^31. - * - * @param d Value to round. - * @return Rounded value. Some bias may be introduced. - */ - -template -inline T round(const T d) { - return (d < 0.0 ? -(T)(int)((T)0.5 - d) : (T)(int)(d + (T)0.5)); -} - -/** - * Template function "clamps" (clips) the specified value so that it is not - * lesser than "minv", and not greater than "maxv". - * - * @param Value Value to clamp. - * @param minv Minimal allowed value. - * @param maxv Maximal allowed value. - * @return The clamped value. - */ - -template -inline T clamp(const T& Value, const T minv, const T maxv) { - if (Value < minv) { - return (minv); - } else if (Value > maxv) { - return (maxv); - } else { - return (Value); - } -} - -/** - * Power 2.4 approximation function, designed for sRGB gamma correction. - * - * @param x Argument, in the range 0.09 to 1. - * @return Value raised into power 2.4, approximate. - */ - -template -inline T pow24_sRGB(const T x) { - const double x2 = x * x; - const double x3 = x2 * x; - const double x4 = x2 * x2; - - return ((T)(0.0985766365536824 + 0.839474952656502 * x2 + - 0.363287814061725 * x3 - - 0.0125559718896615 / (0.12758338921578 + 0.290283465468235 * x) - - 0.231757513261358 * x - 0.0395365717969074 * x4)); -} - -/** - * Power 1/2.4 approximation function, designed for sRGB gamma correction. - * - * @param x Argument, in the range 0.003 to 1. - * @return Value raised into power 1/2.4, approximate. - */ - -template -inline T pow24i_sRGB(const T x) { - const double sx = sqrt(x); - const double ssx = sqrt(sx); - const double sssx = sqrt(ssx); - - return ((T)(0.000213364515060263 + 0.0149409239419218 * x + - 0.433973412731747 * sx + - ssx * (0.659628181609715 * sssx - 0.0380957908841466 - - 0.0706476137208521 * sx))); -} - -/** - * Function approximately linearizes the sRGB gamma value. - * - * @param s sRGB gamma value, in the range 0 to 1. - * @return Linearized sRGB gamma value, approximated. - */ - -template -inline T convertSRGB2Lin(const T s) { - const T a = (T)0.055; - - if (s <= (T)0.04045) { - return (s / (T)12.92); - } - - return (pow24_sRGB((s + a) / ((T)1 + a))); -} - -/** - * Function approximately de-linearizes the linear gamma value. - * - * @param s Linear gamma value, in the range 0 to 1. - * @return sRGB gamma value, approximated. - */ - -template -inline T convertLin2SRGB(const T s) { - const T a = (T)0.055; - - if (s <= (T)0.0031308) { - return ((T)12.92 * s); - } - - return (((T)1 + a) * pow24i_sRGB(s) - a); -} - -/** - * Function converts (via typecast) specified array of type T1 values of - * length l into array of type T2 values. If T1 is the same as T2, copy - * operation is performed. When copying data at overlapping address spaces, - * "op" should be lower than "ip". - * - * @param ip Input buffer. - * @param[out] op Output buffer. - * @param l The number of elements to copy. - * @param ip Input buffer pointer increment. - * @param op Output buffer pointer increment. - */ - -template -inline void copyArray(const T1* ip, T2* op, int l, const int ipinc = 1, - const int opinc = 1) { - while (l > 0) { - *op = (T2)*ip; - op += opinc; - ip += ipinc; - l--; - } -} - -/** - * Function adds values located in array "ip" to array "op". - * - * @param ip Input buffer. - * @param[out] op Output buffer. - * @param l The number of elements to add. - * @param ip Input buffer pointer increment. - * @param op Output buffer pointer increment. - */ - -template -inline void addArray(const T1* ip, T2* op, int l, const int ipinc = 1, - const int opinc = 1) { - while (l > 0) { - *op += *ip; - op += opinc; - ip += ipinc; - l--; - } -} - -/** - * Function that replicates a set of adjacent elements several times in a row. - * This operation is usually used to replicate pixels at the start or end of - * image's scanline. - * - * @param ip Source array. - * @param ipl Source array length (usually 1..4, but can be any number). - * @param[out] op Destination buffer. - * @param l Number of times the source array should be replicated (the - * destination buffer should be able to hold ipl * l number of elements). - * @param opinc Destination buffer position increment after replicating the - * source array. This value should be equal to at least ipl. - */ - -template -inline void replicateArray(const T1* const ip, const int ipl, T2* op, int l, - const int opinc) { - if (ipl == 1) { - while (l > 0) { - op[0] = ip[0]; - op += opinc; - l--; - } - } else if (ipl == 4) { - while (l > 0) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - op[3] = ip[3]; - op += opinc; - l--; - } - } else if (ipl == 3) { - while (l > 0) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - op += opinc; - l--; - } - } else if (ipl == 2) { - while (l > 0) { - op[0] = ip[0]; - op[1] = ip[1]; - op += opinc; - l--; - } - } else { - while (l > 0) { - int i; - - for (i = 0; i < ipl; i++) { - op[i] = ip[i]; - } - - op += opinc; - l--; - } - } -} - -/** - * Function calculates frequency response of the specified FIR filter at the - * specified circular frequency. Phase can be calculated as atan2( im, re ). - * Function uses computationally-efficient oscillators instead of "cos" and - * "sin" functions. - * - * @param flt FIR filter's coefficients. - * @param fltlen Number of coefficients (taps) in the filter. - * @param th Circular frequency [0; pi]. - * @param[out] re0 Resulting real part of the complex frequency response. - * @param[out] im0 Resulting imaginary part of the complex frequency response. - * @param fltlat Filter's latency in samples (taps). - */ - -template -inline void calcFIRFilterResponse(const T* flt, int fltlen, const double th, - double& re0, double& im0, - const int fltlat = 0) { - const double sincr = 2.0 * cos(th); - double cvalue1; - double svalue1; - - if (fltlat == 0) { - cvalue1 = 1.0; - svalue1 = 0.0; - } else { - cvalue1 = cos(-fltlat * th); - svalue1 = sin(-fltlat * th); - } - - double cvalue2 = cos(-(fltlat + 1) * th); - double svalue2 = sin(-(fltlat + 1) * th); - - double re = 0.0; - double im = 0.0; - - while (fltlen > 0) { - re += cvalue1 * flt[0]; - im += svalue1 * flt[0]; - flt++; - fltlen--; - - double tmp = cvalue1; - cvalue1 = sincr * cvalue1 - cvalue2; - cvalue2 = tmp; - - tmp = svalue1; - svalue1 = sincr * svalue1 - svalue2; - svalue2 = tmp; - } - - re0 = re; - im0 = im; -} - -/** - * Function normalizes FIR filter so that its frequency response at DC is - * equal to DCGain. - * - * @param[in,out] p Filter coefficients. - * @param l Filter length. - * @param DCGain Filter's gain at DC. - * @param pstep "p" array step. - */ - -template -inline void normalizeFIRFilter(T* const p, const int l, const double DCGain, - const int pstep = 1) { - double s = 0.0; - T* pp = p; - int i = l; - - while (i > 0) { - s += *pp; - pp += pstep; - i--; - } - - s = DCGain / s; - pp = p; - i = l; - - while (i > 0) { - *pp = (T)(*pp * s); - pp += pstep; - i--; - } -} - -/** - * @brief Memory buffer class for element array storage, with capacity - * tracking. - * - * Allows easier handling of memory blocks allocation and automatic - * deallocation for arrays (buffers) consisting of elements of specified - * class. Tracks buffer's capacity in "int" variable; unsuitable for - * allocation of very large memory blocks (with more than 2 billion elements). - * - * This class manages memory space only - it does not perform element class - * construction (initialization) operations. Buffer's required memory address - * alignment specification is supported. - * - * Uses standard library to allocate and deallocate memory. - * - * @tparam T Buffer element's type. - * @tparam capint Buffer capacity's type to use. Use size_t for large buffers. - */ - -template -class CBuffer { - public: - CBuffer() : Data(NULL), DataAligned(NULL), Capacity(0), Alignment(0) {} - - /** - * Constructor creates the buffer with the specified capacity. - * - * @param aCapacity Buffer's capacity. - * @param aAlignment Buffer's required memory address alignment. 0 - use - * stdlib's default alignment. - */ - - CBuffer(const capint aCapacity, const int aAlignment = 0) { - allocinit(aCapacity, aAlignment); - } - - CBuffer(const CBuffer& Source) { - allocinit(Source.Capacity, Source.Alignment); - memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T)); - } - - ~CBuffer() { freeData(); } - - CBuffer& operator=(const CBuffer& Source) { - alloc(Source.Capacity, Source.Alignment); - memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T)); - return (*this); - } - - /** - * Function allocates memory so that the specified number of elements - * can be stored in *this buffer object. - * - * @param aCapacity Storage for this number of elements to allocate. - * @param aAlignment Buffer's required memory address alignment, - * power-of-2 values only. 0 - use stdlib's default alignment. - */ - - void alloc(const capint aCapacity, const int aAlignment = 0) { - freeData(); - allocinit(aCapacity, aAlignment); - } - - /** - * Function deallocates any previously allocated buffer. - */ - - void free() { - freeData(); - Data = NULL; - DataAligned = NULL; - Capacity = 0; - Alignment = 0; - } - - /** - * @return The capacity of the element buffer. - */ - - capint getCapacity() const { return (Capacity); } - - /** - * Function "forces" *this buffer to have an arbitary capacity. Calling - * this function invalidates all further operations except deleting *this - * object. This function should not be usually used at all. Function can - * be used to "model" certain buffer capacity without calling a costly - * memory allocation function. - * - * @param NewCapacity A new "forced" capacity. - */ - - void forceCapacity(const capint NewCapacity) { Capacity = NewCapacity; } - - /** - * Function reallocates *this buffer to a larger size so that it will be - * able to hold the specified number of elements. Downsizing is not - * performed. Alignment is not changed. - * - * @param NewCapacity New (increased) capacity. - * @param DoDataCopy "True" if data in the buffer should be retained. - */ - - void increaseCapacity(const capint NewCapacity, - const bool DoDataCopy = true) { - if (NewCapacity < Capacity) { - return; - } - - if (DoDataCopy) { - const capint PrevCapacity = Capacity; - T* const PrevData = Data; - T* const PrevDataAligned = DataAligned; - - allocinit(NewCapacity, Alignment); - memcpy(DataAligned, PrevDataAligned, PrevCapacity * sizeof(T)); - - ::free(PrevData); - } else { - ::free(Data); - allocinit(NewCapacity, Alignment); - } - } - - /** - * Function "truncates" (reduces) capacity of the buffer without - * reallocating it. Alignment is not changed. - * - * @param NewCapacity New required capacity. - */ - - void truncateCapacity(const capint NewCapacity) { - if (NewCapacity >= Capacity) { - return; - } - - Capacity = NewCapacity; - } - - /** - * Function increases capacity so that the specified number of - * elements can be stored. This function increases the previous capacity - * value by third the current capacity value until space for the required - * number of elements is available. Alignment is not changed. - * - * @param ReqCapacity Required capacity. - */ - - void updateCapacity(const capint ReqCapacity) { - if (ReqCapacity <= Capacity) { - return; - } - - capint NewCapacity = Capacity; - - while (NewCapacity < ReqCapacity) { - NewCapacity += NewCapacity / 3 + 1; - } - - increaseCapacity(NewCapacity); - } - - operator T*() const { return (DataAligned); } - - private: - T* Data; ///< Element buffer pointer. - ///< - T* DataAligned; ///< Memory address-aligned element buffer pointer. - ///< - capint Capacity; ///< Element buffer capacity. - ///< - int Alignment; ///< Memory address alignment in use. 0 - use stdlib's - ///< default alignment. - ///< - - /** - * Internal element buffer allocation function used during object - * construction. - * - * @param aCapacity Storage for this number of elements to allocate. - * @param aAlignment Buffer's required memory address alignment. 0 - use - * stdlib's default alignment. - */ - - void allocinit(const capint aCapacity, const int aAlignment) { - if (aAlignment == 0) { - Data = (T*)::malloc(aCapacity * sizeof(T)); - DataAligned = Data; - Alignment = 0; - } else { - Data = (T*)::malloc(aCapacity * sizeof(T) + aAlignment); - DataAligned = alignptr(Data, aAlignment); - Alignment = aAlignment; - } - - Capacity = aCapacity; - } - - /** - * Function frees a previously allocated Data buffer. - */ - - void freeData() { ::free(Data); } - - /** - * Function modifies the specified pointer so that it becomes memory - * address-aligned. - * - * @param ptr Pointer to align. - * @param align Alignment in bytes to apply. - * @return Pointer aligned to align bytes. Works with power-of-2 - * alignments only. If no alignment is necessary, "align" bytes will be - * added to the pointer value. - */ - - template - inline Tp alignptr(const Tp ptr, const uintptr_t align) { - return ((Tp)((uintptr_t)ptr + align - ((uintptr_t)ptr & (align - 1)))); - } -}; - -/** - * Function optimizes the length of the symmetric-odd FIR filter by removing - * left- and rightmost elements that are below specific threshold. - * - * Synthetic test shows that filter gets optimized in 2..3% of cases and in - * each such case optimization reduces filter length by 6..8%. Optimization, - * however, may skew the results of algorithm modeling and complexity - * calculation leading to a choice of a less optimal algorithm. - * - * @param[in,out] Flt Buffer that contains filter being optimized. - * @param[in,out] FltLatency Variable that holds the current latency of the - * filter. May be adjusted on function return. - * @param Threshold Threshold level. - */ - -template -inline void optimizeFIRFilter(CBuffer& Flt, int& FltLatency, - T const Threshold = (T)0.00001) { - int i; - - // Optimize length. - - for (i = 0; i <= FltLatency; i++) { - if (fabs(Flt[i]) >= Threshold || i == FltLatency) { - if (i > 0) { - const int NewCapacity = Flt.getCapacity() - i * 2; - copyArray(&Flt[i], &Flt[0], NewCapacity); - Flt.truncateCapacity(NewCapacity); - FltLatency -= i; - } - - break; - } - } -} - -/** - * @brief Array of structured objects. - * - * Implements allocation of a linear array of objects of class T (which are - * initialized), addressable via operator[]. Each object is created via the - * "operator new". New object insertions are quick since implementation uses - * prior space allocation (capacity), thus not requiring frequent memory block - * reallocations. - * - * @tparam T Array element's type. - */ - -template -class CStructArray { - public: - CStructArray() : ItemCount(0) {} - - CStructArray(const CStructArray& Source) - : ItemCount(0), Items(Source.getItemCount()) { - while (ItemCount < Source.getItemCount()) { - Items[ItemCount] = new T(Source[ItemCount]); - ItemCount++; - } - } - - ~CStructArray() { clear(); } - - CStructArray& operator=(const CStructArray& Source) { - clear(); - - const int NewCount = Source.ItemCount; - Items.updateCapacity(NewCount); - - while (ItemCount < NewCount) { - Items[ItemCount] = new T(Source[ItemCount]); - ItemCount++; - } - - return (*this); - } - - T& operator[](const int Index) { return (*Items[Index]); } - - const T& operator[](const int Index) const { return (*Items[Index]); } - - /** - * Function creates a new object of type T with the default constructor - * and adds this object to the array. - * - * @return Reference to a newly added object. - */ - - T& add() { - if (ItemCount == Items.getCapacity()) { - Items.increaseCapacity(ItemCount * 3 / 2 + 1); - } - - Items[ItemCount] = new T(); - ItemCount++; - - return ((*this)[ItemCount - 1]); - } - - /** - * Function changes number of allocated items. New items are created with - * the default constructor. If NewCount is below the current item count, - * items that are above NewCount range will be destructed. - * - * @param NewCount New requested item count. - */ - - void setItemCount(const int NewCount) { - if (NewCount > ItemCount) { - Items.increaseCapacity(NewCount); - - while (ItemCount < NewCount) { - Items[ItemCount] = new T(); - ItemCount++; - } - } else { - while (ItemCount > NewCount) { - ItemCount--; - delete Items[ItemCount]; - } - } - } - - /** - * Function erases all items of *this array. - */ - - void clear() { - while (ItemCount > 0) { - ItemCount--; - delete Items[ItemCount]; - } - } - - /** - * @return The number of allocated items. - */ - - int getItemCount() const { return (ItemCount); } - - private: - int ItemCount; ///< The number of items available in the array. - ///< - CBuffer Items; ///< Element buffer. - ///< -}; - -/** - * @brief Sine signal generator class. - * - * Class implements sine signal generator without biasing, with - * constructor-based initalization only. This generator uses oscillator - * instead of "sin" function. - */ - -class CSineGen { - public: - /** - * Constructor initializes *this sine signal generator. - * - * @param si Sine function increment, in radians. - * @param ph Starting phase, in radians. Add 0.5 * AVIR_PI for cosine - * function. - */ - - CSineGen(const double si, const double ph) - : svalue1(sin(ph)), svalue2(sin(ph - si)), sincr(2.0 * cos(si)) {} - - /** - * @return The next value of the sine function, without biasing. - */ - - double generate() { - const double res = svalue1; - - svalue1 = sincr * res - svalue2; - svalue2 = res; - - return (res); - } - - private: - double svalue1; ///< Current sine value. - ///< - double svalue2; ///< Previous sine value. - ///< - double sincr; ///< Sine value increment. - ///< -}; - -/** - * @brief Peaked Cosine window function generator class. - * - * Class implements Peaked Cosine window function generator. Generates the - * right-handed half of the window function. The Alpha parameter of this - * window function offers the control of the balance between the early and - * later taps of the filter. E.g. at Alpha=1 both early and later taps are - * attenuated, but at Alpha=4 mostly later taps are attenuated. This offers a - * great control over ringing artifacts produced by a low-pass filter in image - * processing, without compromising achieved image sharpness. - */ - -class CDSPWindowGenPeakedCosine { - public: - /** - * Constructor initializes *this window function generator. - * - * @param aAlpha Alpha parameter, affects the peak shape (peak - * augmentation) of the window function. Should be >= 1.0. - * @param aLen2 Half filter's length (non-truncated). - */ - - CDSPWindowGenPeakedCosine(const double aAlpha, const double aLen2) - : Alpha(aAlpha), - Len2(aLen2), - wn(0), - w1(AVIR_PId2 / Len2, AVIR_PI * 0.5) {} - - /** - * @return The next Peaked Cosine window function coefficient. - */ - - double generate() { - const double h = pow(wn / Len2, Alpha); - wn++; - - return (w1.generate() * (1.0 - h)); - } - - private: - double Alpha; ///< Alpha parameter, affects the peak shape of window. - ///< - double Len2; ///< Half length of the window function. - ///< - int wn; ///< Window function integer position. 0 - center of the - ///< window function. - ///< - CSineGen w1; ///< Sine-wave generator. - ///< -}; - -/** - * @brief FIR filter-based equalizer generator. - * - * Class implements an object used to generate symmetric-odd FIR filters with - * the specified frequency response (aka paragraphic equalizer). The - * calculated filter is windowed by the Peaked Cosine window function. - * - * In image processing, due to short length of filters being used (6-8 taps) - * the resulting frequency response of the filter is approximate and may be - * mathematically imperfect, but still adequate to the visual requirements. - * - * On a side note, this equalizer generator can be successfully used for audio - * signal equalization as well: for example, it is used in almost the same - * form in Voxengo Marvel GEQ equalizer plug-in. - * - * Filter generation is based on decomposition of frequency range into - * spectral bands, with each band represented by linear and ramp "kernels". - * When the filter is built, these kernels are combined together with - * different weights that approximate the required frequency response. - */ - -class CDSPFIREQ { - public: - /** - * Function initializes *this object with the required parameters. The - * gain of frequencies beyond the MinFreq..MaxFreq range are controlled by - * the first and the last band's gain. - * - * @param SampleRate Processing sample rate (use 2 for image processing). - * @param aFilterLength Required filter length in samples (taps). The - * actual filter length is truncated to an integer value. - * @param aBandCount Number of band crossover points required to control, - * including bands at MinFreq and MaxFreq. - * @param MinFreq Minimal frequency that should be controlled. - * @param MaxFreq Maximal frequency that should be controlled. - * @param IsLogBands "True" if the bands should be spaced logarithmically. - * @param WFAlpha Peaked Cosine window function's Alpha parameter. - */ - - void init(const double SampleRate, const double aFilterLength, - const int aBandCount, const double MinFreq, const double MaxFreq, - const bool IsLogBands, const double WFAlpha) { - FilterLength = aFilterLength; - BandCount = aBandCount; - - CenterFreqs.alloc(BandCount); - - z = (int)ceil(FilterLength * 0.5); - zi = z + (z & 1); - z2 = z * 2; - - CBuffer oscbuf(z2); - initOscBuf(oscbuf); - - CBuffer winbuf(z); - initWinBuf(winbuf, WFAlpha); - - UseFirstVirtBand = (MinFreq > 0.0); - const int k = zi * (BandCount + (UseFirstVirtBand ? 1 : 0)); - Kernels1.alloc(k); - Kernels2.alloc(k); - - double m; // Frequency step multiplier. - double mo; // Frequency step offset (addition). - - if (IsLogBands) { - m = exp(log(MaxFreq / MinFreq) / (BandCount - 1)); - mo = 0.0; - } else { - m = 1.0; - mo = (MaxFreq - MinFreq) / (BandCount - 1); - } - - double f = MinFreq; - double x1 = 0.0; - double x2; - int si; - - if (UseFirstVirtBand) { - si = 0; - } else { - si = 1; - CenterFreqs[0] = 0.0; - f = f * m + mo; - } - - double* kernbuf1 = &Kernels1[0]; - double* kernbuf2 = &Kernels2[0]; - int i; - - for (i = si; i < BandCount; i++) { - x2 = f * 2.0 / SampleRate; - CenterFreqs[i] = x2; - - fillBandKernel(x1, x2, kernbuf1, kernbuf2, oscbuf, winbuf); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - f = f * m + mo; - } - - if (x1 < 1.0) { - UseLastVirtBand = true; - fillBandKernel(x1, 1.0, kernbuf1, kernbuf2, oscbuf, winbuf); - } else { - UseLastVirtBand = false; - } - } - - /** - * @return Filter's length, in samples (taps). - */ - - int getFilterLength() const { return (z2 - 1); } - - /** - * @return Filter's latency (group delay), in samples (taps). - */ - - int getFilterLatency() const { return (z - 1); } - - /** - * Function creates symmetric-odd FIR filter with the specified gain - * levels at band crossover points. - * - * @param BandGains Array of linear gain levels, count=BandCount specified - * in the init() function. - * @param[out] Filter Output filter buffer, length = getFilterLength(). - */ - - void buildFilter(const double* const BandGains, double* const Filter) { - const double* kernbuf1 = &Kernels1[0]; - const double* kernbuf2 = &Kernels2[0]; - double x1 = 0.0; - double y1 = BandGains[0]; - double x2; - double y2; - - int i; - int si; - - if (UseFirstVirtBand) { - si = 1; - x2 = CenterFreqs[0]; - y2 = y1; - } else { - si = 2; - x2 = CenterFreqs[1]; - y2 = BandGains[1]; - } - - copyBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - y1 = y2; - - for (i = si; i < BandCount; i++) { - x2 = CenterFreqs[i]; - y2 = BandGains[i]; - - addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - y1 = y2; - } - - if (UseLastVirtBand) { - addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - y1); - } - - for (i = 0; i < z - 1; i++) { - Filter[z + i] = Filter[z - 2 - i]; - } - } - - /** - * Function calculates filter's length (in samples) and latency depending - * on the required non-truncated filter length. - * - * @param aFilterLength Required filter length in samples (non-truncated). - * @param[out] Latency Resulting latency (group delay) of the filter, - * in samples (taps). - * @return Filter length in samples (taps). - */ - - static int calcFilterLength(const double aFilterLength, int& Latency) { - const int l = (int)ceil(aFilterLength * 0.5); - Latency = l - 1; - - return (l * 2 - 1); - } - - private: - double FilterLength; ///< Length of filter. - ///< - int z; ///< Equals (int) ceil( FilterLength * 0.5 ). - ///< - int zi; ///< Equals "z" if z is even, or z + 1 if z is odd. Used as a - ///< Kernels1 and Kernels2 size multiplier and kernel buffer - ///< increment to make sure each kernel buffer is 16-byte aligned. - ///< - int z2; ///< Equals z * 2. - ///< - int BandCount; ///< Number of controllable bands. - ///< - CBuffer CenterFreqs; ///< Center frequencies for all bands, - ///< normalized to 0.0-1.0 range. - ///< - CBuffer Kernels1; ///< Half-length kernel buffers for each - ///< spectral band (linear part). - ///< - CBuffer Kernels2; ///< Half-length kernel buffers for each - ///< spectral band (ramp part). - ///< - bool UseFirstVirtBand; ///< "True" if the first virtual band - ///< (between 0.0 and MinFreq) should be used. The - ///< first virtual band won't be used if MinFreq - ///< equals 0.0. - ///< - bool UseLastVirtBand; ///< "True" if the last virtual band (between - ///< MaxFreq and SampleRate * 0.5) should be used. The - ///< last virtual band won't be used if MaxFreq * 2.0 - ///< equals SampleRate. - ///< - - /** - * Function initializes the "oscbuf" used in the fillBandKernel() - * function. - * - * @param oscbuf Oscillator buffer, length = z * 2. - */ - - void initOscBuf(double* oscbuf) const { - int i = z; - - while (i > 0) { - oscbuf[0] = 0.0; - oscbuf[1] = 1.0; - oscbuf += 2; - i--; - } - } - - /** - * Function initializes window function buffer. This function generates - * Peaked Cosine window function. - * - * @param winbuf Windowing buffer. - * @param Alpha Peaked Cosine alpha parameter. - */ - - void initWinBuf(double* winbuf, const double Alpha) const { - CDSPWindowGenPeakedCosine wf(Alpha, FilterLength * 0.5); - int i; - - for (i = 1; i <= z; i++) { - winbuf[z - i] = wf.generate(); - } - } - - /** - * Function fills first half of symmetric-odd FIR kernel for the band. - * This function should be called successively for adjacent bands. - * Previous band's x2 should be equal to current band's x1. A band kernel - * consists of 2 elements: linear kernel and ramp kernel. - * - * @param x1 Band's left corner frequency (0..1). - * @param x2 Band's right corner frequency (0..1). - * @param kernbuf1 Band kernel buffer 1 (linear part), length = z. - * @param kernbuf2 Band kernel buffer 2 (ramp part), length = z. - * @param oscbuf Oscillation buffer. Before the first call of the - * fillBandKernel() should be initialized with the call of the - * initOscBuf() function. - * @param winbuf Buffer that contains windowing function. - */ - - void fillBandKernel(const double x1, const double x2, double* kernbuf1, - double* kernbuf2, double* oscbuf, - const double* const winbuf) { - const double s2_incr = AVIR_PI * x2; - const double s2_coeff = 2.0 * cos(s2_incr); - - double s2_value1 = sin(s2_incr * (-z + 1)); - double c2_value1 = sin(s2_incr * (-z + 1) + AVIR_PI * 0.5); - oscbuf[0] = sin(s2_incr * -z); - oscbuf[1] = sin(s2_incr * -z + AVIR_PI * 0.5); - - int ks; - - for (ks = 1; ks < z; ks++) { - const int ks2 = ks * 2; - const double s1_value1 = oscbuf[ks2]; - const double c1_value1 = oscbuf[ks2 + 1]; - oscbuf[ks2] = s2_value1; - oscbuf[ks2 + 1] = c2_value1; - - const double x = AVIR_PI * (ks - z); - const double v0 = winbuf[ks - 1] / ((x1 - x2) * x); - - kernbuf1[ks - 1] = - (x2 * s2_value1 - x1 * s1_value1 + (c2_value1 - c1_value1) / x) * v0; - - kernbuf2[ks - 1] = (s2_value1 - s1_value1) * v0; - - s2_value1 = s2_coeff * s2_value1 - oscbuf[ks2 - 2]; - c2_value1 = s2_coeff * c2_value1 - oscbuf[ks2 - 1]; - } - - kernbuf1[z - 1] = (x2 * x2 - x1 * x1) / (x1 - x2) * 0.5; - kernbuf2[z - 1] = -1.0; - } - - /** - * Function copies band kernel's elements to the output buffer. - * - * @param outbuf Output buffer. - * @param kernbuf1 Kernel buffer 1 (linear part). - * @param kernbuf2 Kernel buffer 2 (ramp part). - * @param c Multiplier for linear kernel element. - * @param d Multiplier for ramp kernel element. - */ - - void copyBandKernel(double* outbuf, const double* const kernbuf1, - const double* const kernbuf2, const double c, - const double d) const { - int ks; - - for (ks = 0; ks < z; ks++) { - outbuf[ks] = c * kernbuf1[ks] + d * kernbuf2[ks]; - } - } - - /** - * Function adds band kernel's elements to the output buffer. - * - * @param outbuf Output buffer. - * @param kernbuf1 Kernel buffer 1 (linear part). - * @param kernbuf2 Kernel buffer 2 (ramp part). - * @param c Multiplier for linear kernel element. - * @param d Multiplier for ramp kernel element. - */ - - void addBandKernel(double* outbuf, const double* const kernbuf1, - const double* const kernbuf2, const double c, - const double d) const { - int ks; - - for (ks = 0; ks < z; ks++) { - outbuf[ks] += c * kernbuf1[ks] + d * kernbuf2[ks]; - } - } -}; - -/** - * @brief Low-pass filter windowed by Peaked Cosine window function. - * - * This class implements calculation of linear-phase symmetric-odd FIR - * low-pass filter windowed by the Peaked Cosine window function, for image - * processing applications. - */ - -class CDSPPeakedCosineLPF { - public: - int fl2; ///< Half filter's length, excluding the peak value. This value - ///< can be also used as filter's latency in samples (taps). - ///< - int FilterLen; ///< Filter's length in samples (taps). - ///< - - /** - * Constructor initalizes *this object. - * - * @param aLen2 Half-length (non-truncated) of low-pass filter, in samples - * (taps). - * @param aFreq2 Low-pass filter's corner frequency [0; pi]. - * @param aAlpha Peaked Cosine window function Alpha parameter. - */ - - CDSPPeakedCosineLPF(const double aLen2, const double aFreq2, - const double aAlpha) - : fl2((int)ceil(aLen2) - 1), - FilterLen(fl2 + fl2 + 1), - Len2(aLen2), - Freq2(aFreq2), - Alpha(aAlpha) {} - - /** - * Function generates a linear-phase low-pass filter windowed by Peaked - * Cosine window function. - * - * @param[out] op Output buffer, length = FilterLen (fl2 * 2 + 1). - * @param DCGain Required gain at DC. The resulting filter will be - * normalized to achieve this DC gain. - */ - - template - void generateLPF(T* op, const double DCGain) { - CDSPWindowGenPeakedCosine wf(Alpha, Len2); - CSineGen f2(Freq2, 0.0); - - op += fl2; - T* op2 = op; - f2.generate(); - int t = 1; - - *op = (T)(Freq2 * wf.generate() / AVIR_PI); - double s = *op; - - while (t <= fl2) { - const double v = f2.generate() * wf.generate() / t / AVIR_PI; - op++; - op2--; - *op = (T)v; - *op2 = (T)v; - s += *op + *op2; - t++; - } - - t = FilterLen; - s = DCGain / s; - - while (t > 0) { - *op2 = (T)(*op2 * s); - op2++; - t--; - } - } - - private: - double Len2; ///< Half-length (non-truncated) of low-pass filter, in - ///< samples (taps). - ///< - double Freq2; ///< Low-pass filter's corner frequency. - ///< - double Alpha; ///< Peaked Cosine window function Alpha parameter. - ///< -}; - -/** - * @brief Buffer class for parametrized low-pass filter. - * - * This class extends the CBuffer< double > class by adding several variables - * that define a symmetric-odd FIR low-pass filter windowed by Peaked Cosine - * window function. This class can be used to compare filters without - * comparing their buffer contents. - */ - -class CFltBuffer : public CBuffer { - public: - double Len2; ///< Half-length (non-truncated) of low-pass filters, in - ///< samples (taps). - ///< - double Freq; ///< Low-pass filter's corner frequency. - ///< - double Alpha; ///< Peaked Cosine window function Alpha parameter. - ///< - double DCGain; ///< DC gain applied to the filter. - ///< - - CFltBuffer() - : CBuffer(), Len2(0.0), Freq(0.0), Alpha(0.0), DCGain(0.0) {} - - /** - * @param b2 Filter buffer to compare *this object to. - * @return Operator returns "true" if both filters have same parameters. - */ - - bool operator==(const CFltBuffer& b2) const { - return (Len2 == b2.Len2 && Freq == b2.Freq && Alpha == b2.Alpha && - DCGain == b2.DCGain); - } -}; - -/** - * @brief Sinc function-based fractional delay filter bank. - * - * Class implements storage and initialization of a bank of sinc - * function-based fractional delay filters, expressed as 1st order polynomial - * interpolation coefficients. The filters are produced from a single "long" - * windowed low-pass filter. Also supports 0th-order ("nearest neighbor") - * interpolation. - * - * This class also supports multiplication of each fractional delay filter by - * an external filter (usually a low-pass filter). - * - * @tparam fptype Specifies storage type of the filter coefficients bank. The - * filters are initially calculated using the "double" precision. - */ - -template -class CDSPFracFilterBankLin { - public: - CDSPFracFilterBankLin() : Order(-1) {} - - /** - * Copy constructor copies a limited set of parameters of the source - * filter bank. The actual filters are not copied. Such copying is used - * during filtering steps "modeling" stage. A further init() function - * call is required. - * - * @param s Source filter bank. - */ - - void copyInitParams(const CDSPFracFilterBankLin& s) { - WFLen2 = s.WFLen2; - WFFreq = s.WFFreq; - WFAlpha = s.WFAlpha; - FracCount = s.FracCount; - Order = s.Order; - Alignment = s.Alignment; - SrcFilterLen = s.SrcFilterLen; - FilterLen = s.FilterLen; - FilterSize = s.FilterSize; - IsSrcTableBuilt = false; - ExtFilter = s.ExtFilter; - TableFillFlags.alloc(s.TableFillFlags.getCapacity()); - int i; - - // Copy table fill flags, but shifted so that further initialization - // is still possible (such feature should not be used, though). - - for (i = 0; i < TableFillFlags.getCapacity(); i++) { - TableFillFlags[i] = (uint8_t)(s.TableFillFlags[i] << 2); - } - } - - /** - * Operator compares *this filter bank and another filter bank and returns - * "true" if their parameters are equal. Alignment is not taken into - * account. - * - * @param s Filter bank to compare to. - * @return "True" if compared banks have equal parameters. - */ - - bool operator==(const CDSPFracFilterBankLin& s) const { - return (Order == s.Order && WFLen2 == s.WFLen2 && WFFreq == s.WFFreq && - WFAlpha == s.WFAlpha && FracCount == s.FracCount && - ExtFilter == s.ExtFilter); - } - - /** - * Function initializes (builds) the filter bank based on the supplied - * parameters. If the supplied parameters are equal to previously defined - * parameters, function does nothing (alignment is assumed to be never - * changing between the init() function calls). - * - * @param ReqFracCount Required number of fractional delays in the filter - * bank. The minimal value is 2. - * @param ReqOrder Required order of the interpolation polynomial - * (0 or 1). - * @param BaseLen Low-pass filter's base length, in samples (taps). - * Affects the actual length of the filter and its overall steepness. - * @param Cutoff Low-pass filter's normalized cutoff frequency [0; 1]. - * @param aWFAlpha Peaked Cosine window function's Alpha parameter. - * @param aExtFilter External filter to apply to each fractional delay - * filter. - * @param aAlignment Memory alignment of the filter bank, power-of-2 - * value. 0 - use default stdlib alignment. - * @param FltLenAlign Filter's length alignment, power-of-2 value. - */ - - void init(const int ReqFracCount, const int ReqOrder, const double BaseLen, - const double Cutoff, const double aWFAlpha, - const CFltBuffer& aExtFilter, const int aAlignment = 0, - const int FltLenAlign = 1) { - double NewWFLen2 = 0.5 * BaseLen * ReqFracCount; - double NewWFFreq = AVIR_PI * Cutoff / ReqFracCount; - double NewWFAlpha = aWFAlpha; - - if (ReqOrder == Order && NewWFLen2 == WFLen2 && NewWFFreq == WFFreq && - NewWFAlpha == WFAlpha && ReqFracCount == FracCount && - aExtFilter == ExtFilter) { - IsInitRequired = false; - return; - } - - WFLen2 = NewWFLen2; - WFFreq = NewWFFreq; - WFAlpha = NewWFAlpha; - FracCount = ReqFracCount; - Order = ReqOrder; - Alignment = aAlignment; - ExtFilter = aExtFilter; - - CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha); - SrcFilterLen = (p.fl2 / ReqFracCount + 1) * 2; - - const int ElementSize = ReqOrder + 1; - FilterLen = SrcFilterLen; - - if (ExtFilter.getCapacity() > 0) { - FilterLen += ExtFilter.getCapacity() - 1; - } - - FilterLen = (FilterLen + FltLenAlign - 1) & ~(FltLenAlign - 1); - FilterSize = FilterLen * ElementSize; - IsSrcTableBuilt = false; - IsInitRequired = true; - } - - /** - * @return The length of each fractional delay filter, in samples (taps). - * Always an even value. - */ - - int getFilterLen() const { return (FilterLen); } - - /** - * @return The number of fractional filters in use by *this bank. - */ - - int getFracCount() const { return (FracCount); } - - /** - * @return The order of the interpolation polynomial. - */ - - int getOrder() const { return (Order); } - - /** - * Function returns the pointer to the specified interpolation table - * filter. - * - * @param i Filter (fractional delay) index, in the range 0 to - * ReqFracCount - 1, inclusive. - * @return Pointer to filter. Higher order polynomial coefficients are - * stored after after previous order coefficients, separated by FilterLen - * elements. - */ - - const fptype* getFilter(const int i) { - if (!IsSrcTableBuilt) { - buildSrcTable(); - } - - fptype* const Res = &Table[i * FilterSize]; - - if ((TableFillFlags[i] & 2) == 0) { - createFilter(i); - TableFillFlags[i] |= 2; - - if (Order > 0) { - createFilter(i + 1); - const fptype* const Res2 = Res + FilterSize; - fptype* const op = Res + FilterLen; - int j; - - // Create higher-order interpolation coefficients (linear - // interpolation). - - for (j = 0; j < FilterLen; j++) { - op[j] = Res2[j] - Res[j]; - } - } - } - - return (Res); - } - - /** - * Function makes sure all fractional delay filters were created. - */ - - void createAllFilters() { - int i; - - for (i = 0; i < FracCount; i++) { - getFilter(i); - } - } - - /** - * Function returns an approximate initialization complexity, expressed in - * the number of multiply-add operations. This includes fractional delay - * filters calculation and multiplication by an external filter. This - * function can only be called after the init() function. - * - * @param FracUseMap Fractional delays use map, each element corresponds - * to a single fractional delay, will be compared to the internal table - * fill flags. This map should include 0 and 1 values only. - * @return The complexity of the initialization, expressed in the number - * of multiply-add operations. - */ - - int calcInitComplexity(const CBuffer& FracUseMap) const { - const int FltInitCost = 65; // Cost to initialize a single sample - // of the fractional delay filter. - const int FltUseCost = - FilterLen * Order + - SrcFilterLen * ExtFilter.getCapacity(); // Cost to use a single - // fractional delay filter. - const int ucb[2] = {0, FltUseCost}; - int ic; - int i; - - if (IsInitRequired) { - ic = FracCount * SrcFilterLen * FltInitCost; - - for (i = 0; i < FracCount; i++) { - ic += ucb[FracUseMap[i]]; - } - } else { - ic = 0; - - for (i = 0; i < FracCount; i++) { - if (FracUseMap[i] != 0) { - ic += ucb[TableFillFlags[i] == 0 ? 1 : 0]; - } - } - } - - return (ic); - } - - private: - static const int InterpPoints = 2; ///< The maximal number of points the - ///< interpolation is based on. - ///< - double WFLen2; ///< Window function's Len2 parameter. - ///< - double WFFreq; ///< Window function's Freq parameter. - ///< - double WFAlpha; ///< Window function's Alpha parameter. - ///< - int FracCount; ///< The required number of fractional delay filters. - ///< - int Order; ///< The order of the interpolation polynomial. - ///< - int Alignment; ///< The required filter table alignment. - ///< - int SrcFilterLen; ///< Length of the "source" filters. This is always an - ///< even value. - ///< - int FilterLen; ///< Specifies the number of samples (taps) each fractional - ///< delay filter has. This is always an even value, adjusted - ///< by the FltLenAlign. - ///< - int FilterSize; ///< The size of a single filter element, equals - ///< FilterLen * ElementSize. - ///< - bool IsInitRequired; ///< "True" if SrcTable filter table initialization - ///< is required. This value is available only after the - ///< call to the init() function. - ///< - CBuffer Table; ///< Interpolation table, size equals to - ///< ReqFracCount * FilterLen * ElementSize. - ///< - CBuffer - TableFillFlags; ///< Contains ReqFracCount + 1 - ///< elements. Bit 0 of every element is 1 if Table - ///< already contains the filter from SrcTable filtered - ///< by ExtFilter. Bit 1 of every element means higher - ///< order coefficients were filled for the filter. - ///< - CFltBuffer ExtFilter; ///< External filter that should be applied to every - ///< fractional delay filter. Can be empty. Half of - ///< this filter's capacity is used as latency (group - ///< delay) value of the filter. - ///< - CBuffer SrcTable; ///< Source table of delay filters, contains - ///< ReqFracCount + 1 elements. This table is used - ///< to fill the Table with the actual filters, - ///< filtered by an external filter. - ///< - bool IsSrcTableBuilt; ///< "True" if the SrcTable was built already. This - ///< variable is set to "false" in the init() function. - ///< - - /** - * Function builds source table used in the createFilter() function. - */ - - void buildSrcTable() { - IsSrcTableBuilt = true; - IsInitRequired = false; - - CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha); - - const int BufLen = SrcFilterLen * FracCount + InterpPoints - 1; - const int BufOffs = InterpPoints / 2 - 1; - const int BufCenter = SrcFilterLen * FracCount / 2 + BufOffs; - - CBuffer Buf(BufLen); - memset(Buf, 0, (BufCenter - p.fl2) * sizeof(double)); - int i = BufLen - BufCenter - p.fl2 - 1; - memset(&Buf[BufLen - i], 0, i * sizeof(double)); - - p.generateLPF(&Buf[BufCenter - p.fl2], FracCount); - - SrcTable.alloc((FracCount + 1) * SrcFilterLen); - TableFillFlags.alloc(FracCount + 1); - int j; - double* op0 = SrcTable; - - for (i = FracCount; i >= 0; i--) { - TableFillFlags[i] = 0; - double* p = Buf + BufOffs + i; - - for (j = 0; j < SrcFilterLen; j++) { - op0[0] = p[0]; - op0++; - p += FracCount; - } - } - - Table.alloc((FracCount + 1) * FilterSize, Alignment); - } - - /** - * Function creates the specified filter in the Table by copying it from - * the SrcTable and filtering by ExtFilter. Function does nothing if - * filter was already created. - * - * @param k Filter index to create, in the range 0 to FracCount, - * inclusive. - */ - - void createFilter(const int k) { - if (TableFillFlags[k] != 0) { - return; - } - - TableFillFlags[k] |= 1; - const int ExtFilterLatency = ExtFilter.getCapacity() / 2; - const int ResLatency = ExtFilterLatency + SrcFilterLen / 2; - int ResLen = SrcFilterLen; - - if (ExtFilter.getCapacity() > 0) { - ResLen += ExtFilter.getCapacity() - 1; - } - - const int ResOffs = FilterLen / 2 - ResLatency; - fptype* op = &Table[k * FilterSize]; - int i; - - for (i = 0; i < ResOffs; i++) { - op[i] = 0.0; - } - - for (i = ResOffs + ResLen; i < FilterLen; i++) { - op[i] = 0.0; - } - - op += ResOffs; - const double* const srcflt = &SrcTable[k * SrcFilterLen]; - - if (ExtFilter.getCapacity() == 0) { - for (i = 0; i < ResLen; i++) { - op[i] = (fptype)srcflt[i]; - } - - return; - } - - // Perform convolution of extflt and srcflt. - - const double* const extflt = &ExtFilter[0]; - int j; - - for (j = 0; j < ResLen; j++) { - int k = 0; - int l = j - ExtFilter.getCapacity() + 1; - int r = l + ExtFilter.getCapacity(); - - if (l < 0) { - k -= l; - l = 0; - } - - if (r > SrcFilterLen) { - r = SrcFilterLen; - } - - const double* const extfltb = extflt + k; - const double* const srcfltb = srcflt + l; - double s = 0.0; - l = r - l; - - for (i = 0; i < l; i++) { - s += extfltb[i] * srcfltb[i]; - } - - op[j] = (fptype)s; - } - } -}; - -/** - * @brief Thread pool for multi-threaded image resizing operation. - * - * This base class is used to organize a multi-threaded image resizing - * operation. The thread pool should consist of threads that initially wait - * for a signal. Upon receiving a signal (via the startAllWorkloads() - * function) each previously added thread should execute its workload's - * process() function once, and return to the wait signal state again. The - * thread pool should be also able to efficiently wait for all workloads to - * finish via the waitAllWorkloadsToFinish() function. - * - * The image resizing algorithm makes calls to functions of this class. - */ - -class CImageResizerThreadPool { - public: - CImageResizerThreadPool() {} - - virtual ~CImageResizerThreadPool() {} - - /** - * @brief Thread pool's workload object class. - * - * This class should be used as a base class for objects that perform the - * actual work spread over several threads. - */ - - class CWorkload { - public: - virtual ~CWorkload() {} - - /** - * Function that gets called from the thread when thread pool's - * startAllWorkloads() function is called. - */ - - virtual void process() = 0; - }; - - /** - * @return The suggested number of workloads (and their associated - * threads) to add. The minimal value this function can return is 1. The - * usual value may depend on the number of physical and virtual cores - * present in the system, and on other considerations. - */ - - virtual int getSuggestedWorkloadCount() const { return (1); } - - /** - * Function adds a new workload (and possibly thread) to the thread pool. - * The caller decides how many parallel workloads (and threads) it - * requires, but this number will not exceed the value returned by the - * getSuggestedWorkloadCount() function. It is implementation-specific how - * many workloads to associate with a single thread. But for efficiency - * reasons each workload should be associated with its own thread. - * - * Note that the same set of workload objects will be processed each time - * the startAllWorkloads() function is called. This means that workload - * objects are added only once. The caller changes the state of the - * workload objects and then calls the startAllWorkloads() function to - * process them. - * - * @param Workload Workload object whose process() function will be called - * from within the thread when the startAllWorkloads() function is called. - */ - - virtual void addWorkload(CWorkload* const Workload) {} - - /** - * Function starts all workloads associated with threads previously added - * via the addWorkload() function. It is assumed that this function - * performs the necessary "memory barrier" (or "cache sync") kind of - * operation so that all threads catch up the prior changes made to the - * workload objects during their wait state. - */ - - virtual void startAllWorkloads() {} - - /** - * Function waits for all workloads to finish. - */ - - virtual void waitAllWorkloadsToFinish() {} - - /** - * Function removes all workloads previously added via the addWorkload() - * function. This function gets called only after the - * waitAllWorkloadsToFinish() function call. - */ - - virtual void removeAllWorkloads() {} -}; - -/** - * @brief Resizing algorithm parameters structure. - * - * This structure holds all selectable parameters used by the resizing - * algorithm at various stages, for both downsizing and upsizing. There are no - * other parameters exist that can optimize the performance of the resizing - * algorithm. Filter length parameters can take fractional values. - * - * Beside quality, these parameters (except Alpha parameters) directly affect - * the computative cost of the resizing algorithm. It is possible to trade - * the visual quality for computative cost. - * - * Anti-alias filtering during downsizing can be defined as a considerable - * reduction of contrast of smallest features of an image. Unfortunately, such - * de-contrasting partially affects features of all sizes thus producing a - * non-linearity of frequency response. All pre-defined parameter sets are - * described by 3 values separated by slashes. The first value is the - * de-contrasting factor of small features (which are being removed) while - * the second value is the de-contrasting factor of large features (which - * should remain intact), with value of 1 equating to "no contrast change". - * The third value is the optimization score (see below), with value of 0 - * equating to the "perfect" linearity of frequency response. - * - * The pre-defined parameter sets offered by this library were auto-optimized - * for the given LPFltBaseLen, IntFltLen and CorrFltAlpha values. The - * optimization goal was to minimize the score: the sum of squares of the - * difference between original and processed images (which was not actually - * resized, k=1). The original image was a 0.5 megapixel uniformly-distributed - * white-noise image with pixel intensities in the 0-1 range. Such goal - * converges very well and produces filtering system with the flattest - * frequency response possible for the given constraints. With this goal, - * increasing the LPFltBaseLen value reduces the general amount of aliasing - * artifacts. - */ - -struct CImageResizerParams { - double CorrFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the correction filter. The - ///< "usable" values are in the narrow range 1.0 to 1.5. - ///< - double CorrFltLen; ///< Correction filter's length in samples (taps). The - ///< "usable" range is narrow, 5.5 to 8, as to minimize - ///< the "overcorrection" which is mathematically precise, - ///< but visually unacceptable. - ///< - double IntFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the interpolation low-pass filter. - ///< The "usable" values are in the range 1.5 to 2.5. - ///< - double IntFltCutoff; ///< Interpolation low-pass filter's cutoff frequency - ///< (normalized, [0; 1]). The "usable" range is 0.6 to - ///< 0.8. - ///< - double IntFltLen; ///< Interpolation low-pass filter's length in samples - ///< (taps). The length value should be at least 18 or - ///< otherwise a "dark grid" artifact will be introduced if - ///< a further sharpening is applied. IntFltLen together - ///< with other IntFlt parameters should be tuned in a way - ///< that produces the flattest frequency response in 0-0.5 - ///< normalized frequency range (this range is due to 2X - ///< upsampling). - ///< - double LPFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the low-pass filter. The "usable" - ///< values are in the range 1.5 to 6.5. - ///< - double LPFltBaseLen; ///< Base length of the low-pass (aka anti-aliasing - ///< or reconstruction) filter, in samples (taps), - ///< further adjusted by the actual cutoff frequency, - ///< upsampling and downsampling factors. The "usable" - ///< range is between 6 and 9. - ///< - double LPFltCutoffMult; ///< Low-pass filter's cutoff frequency - ///< multiplier. This value can be both below and - ///< above 1.0 as low-pass filters are inserted on - ///< downsampling and upsampling steps and always - ///< have corner frequency equal to or below 0.5pi. - ///< This multiplier shifts low-pass filter's corner - ///< frequency towards lower (if below 1.0) or higher - ///< (if above 1.0) frequencies. This multiplier can - ///< be way below 1.0 since any additional - ///< high-frequency damping will be partially - ///< corrected by the correction filter. The "usable" - ///< range is 0.3 to 1.0. - ///< - - CImageResizerParams() - : HBFltAlpha(1.75395), HBFltCutoff(0.40356), HBFltLen(22.00000) {} - - double HBFltAlpha; ///< Half-band filter's Alpha. Assigned internally. - ///< - double HBFltCutoff; ///< Half-band filter's cutoff point [0; 1]. Assigned - ///< internally. - ///< - double HBFltLen; ///< Length of the half-band low-pass filter. Assigned - ///< internally. Internally used to perform 2X or higher - ///< downsampling. These filter parameters should be treated - ///< as "technical" and do not require adjustment as they - ///< were tuned to suit all combinations of other - ///< parameters. This half-band filter provides a wide - ///< transition band (for minimal ringing artifacts) and a - ///< high stop-band attenuation (for minimal aliasing). - ///< -}; - -/** - * @brief The default set of resizing algorithm parameters - * (10.01/1.029/0.019169). - * - * This is the default set of resizing parameters that was designed to deliver - * a sharp image while still providing a low amount of ringing artifacts, and - * having a reasonable computational cost. - */ - -struct CImageResizerParamsDef : public CImageResizerParams { - CImageResizerParamsDef() { - CorrFltAlpha = 1.0; // 10.01/1.88/1.029(522.43)/0.019169:258648,446808 - CorrFltLen = 6.30770; - IntFltAlpha = 2.27825; - IntFltCutoff = 0.75493; - IntFltLen = 18.0; - LPFltAlpha = 3.40127; - LPFltBaseLen = 7.78; - LPFltCutoffMult = 0.78797; - } -}; - -/** - * @brief Set of resizing algorithm parameters for ultra-low-ringing - * performance (7.69/1.069/0.000245). - * - * This set of resizing algorithm parameters offers the lowest amount of - * ringing this library is capable of providing while still offering a decent - * quality. Low ringing is attained at the expense of higher aliasing - * artifacts and a slightly reduced contrast. - */ - -struct CImageResizerParamsULR : public CImageResizerParams { - CImageResizerParamsULR() { - CorrFltAlpha = 1.0; // 7.69/1.97/1.069(31445.45)/0.000245:258627,436845 - CorrFltLen = 5.83280; - IntFltAlpha = 2.11453; - IntFltCutoff = 0.73986; - IntFltLen = 18.0; - LPFltAlpha = 1.73455; - LPFltBaseLen = 6.40; - LPFltCutoffMult = 0.61314; - } -}; - -/** - * @brief Set of resizing algorithm parameters for low-ringing performance - * (7.86/1.065/0.000106). - * - * This set of resizing algorithm parameters offers a very low-ringing - * performance at the expense of higher aliasing artifacts and a slightly - * reduced contrast. - */ - -struct CImageResizerParamsLR : public CImageResizerParams { - CImageResizerParamsLR() { - CorrFltAlpha = 1.0; // 7.86/1.96/1.065(73865.02)/0.000106:258636,437381 - CorrFltLen = 5.87671; - IntFltAlpha = 2.25322; - IntFltCutoff = 0.74090; - IntFltLen = 18.0; - LPFltAlpha = 1.79306; - LPFltBaseLen = 7.00; - LPFltCutoffMult = 0.68881; - } -}; - -/** - * @brief Set of resizing algorithm parameters for lower-ringing performance - * (8.86/1.046/0.010168). - * - * This set of resizing algorithm parameters offers a lower-ringing - * performance in comparison to the default setting, at the expense of higher - * aliasing artifacts and a slightly reduced contrast. - */ - -struct CImageResizerParamsLow : public CImageResizerParams { - CImageResizerParamsLow() { - CorrFltAlpha = 1.0; // 8.86/1.92/1.046(871.54)/0.010168:258647,442252 - CorrFltLen = 6.09757; - IntFltAlpha = 2.36704; - IntFltCutoff = 0.74674; - IntFltLen = 18.0; - LPFltAlpha = 2.19427; - LPFltBaseLen = 7.66; - LPFltCutoffMult = 0.75380; - } -}; - -/** - * @brief Set of resizing algorithm parameters for low-aliasing - * resizing (11.81/1.012/0.038379). - * - * This set of resizing algorithm parameters offers a considerable - * anti-aliasing performance with a good frequency response linearity (and - * contrast). This is an intermediate setting between the default and Ultra - * parameters. - */ - -struct CImageResizerParamsHigh : public CImageResizerParams { - CImageResizerParamsHigh() { - CorrFltAlpha = 1.0; // 11.81/1.83/1.012(307.84)/0.038379:258660,452719 - CorrFltLen = 6.80909; - IntFltAlpha = 2.44917; - IntFltCutoff = 0.75856; - IntFltLen = 18.0; - LPFltAlpha = 4.39527; - LPFltBaseLen = 8.18; - LPFltCutoffMult = 0.79172; - } -}; - -/** - * @brief Set of resizing algorithm parameters for ultra low-aliasing - * resizing (13.65/1.001/0.000483). - * - * This set of resizing algorithm parameters offers a very considerable - * anti-aliasing performance with a good frequency response linearity (and - * contrast). This set of parameters is computationally expensive and may - * produce ringing artifacts on sharp features. - */ - -struct CImageResizerParamsUltra : public CImageResizerParams { - CImageResizerParamsUltra() { - CorrFltAlpha = 1.0; // 13.65/1.79/1.001(28288.41)/0.000483:258658,457974 - CorrFltLen = 7.48060; - IntFltAlpha = 1.93750; - IntFltCutoff = 0.75462; - IntFltLen = 18.0; - LPFltAlpha = 5.55209; - LPFltBaseLen = 8.34; - LPFltCutoffMult = 0.78002; - } -}; - -/** - * @brief Image resizing variables class. - * - * This is an utility "catch all" class that defines various variables used - * during image resizing. Several variables that are explicitly initialized in - * this class' constructor are also used as additional "input" variables to - * the image resizing function. These variables will not be changed by the - * avir::CImageResizer<>::resizeImage() function. - */ - -class CImageResizerVars { - public: - int ElCount; ///< The number of "fptype" elements used to store 1 pixel. - ///< - int ElCountIO; ///< The number of source and destination image's elements - ///< used to store 1 pixel. - ///< - int fppack; ///< The number of atomic types stored in a single "fptype" - ///< element. - ///< - int fpalign; ///< Suggested alignment size in bytes. This is not a - ///< required alignment, because image resizing algorithm cannot - ///< be made to have a strictly aligned data access in all cases - ///< (e.g. de-interleaved interpolation cannot perform aligned - ///< accesses). - ///< - int elalign; ///< Length alignment of arrays of elements. This applies to - ///< filters and intermediate buffers: this constant forces - ///< filters and scanlines to have a length which is a multiple - ///< of this value, for more efficient SIMD implementation. - ///< - int packmode; ///< 0 if interleaved packing, 1 if de-interleaved. - ///< - int BufLen[2]; ///< Intermediate buffers' lengths in "fptype" elements. - int BufOffs[2]; ///< Offsets into the intermediate buffers, used to - ///< provide prefix elements required during processing so - ///< that no "out of range" access happens. This offset is a - ///< multiple of ElCount if pixels are stored in interleaved - ///< form. - ///< - double k; ///< Resizing step coefficient, updated to reflect the actually - ///< used coefficient during resizing. - ///< - double o; ///< Starting pixel offset inside the source image, updated to - ///< reflect the actually used offset during resizing. - ///< - int ResizeStep; ///< Index of the resizing step in the latest filtering - ///< steps array. - ///< - double InGammaMult; ///< Input gamma multiplier, used to convert input - ///< data to 0 to 1 range. 0.0 if no gamma is in use. - ///< - double OutGammaMult; ///< Output gamma multiplier, used to convert data to - ///< 0 to 255/65535 range. 0.0 if no gamma is in use. - ///< - - double ox; ///< Start X pixel offset within source image (can be - ///< negative). Positive offset moves image to the left. - ///< - double oy; ///< Start Y pixel offset within source image (can be - ///< negative). Positive offset moves image to the top. - ///< - CImageResizerThreadPool* - ThreadPool; ///< Thread pool to be used by the - ///< image resizing function. Set to NULL to use - ///< single-threaded processing. - ///< - bool UseSRGBGamma; ///< Perform sRGB gamma linearization (correction). - ///< - int BuildMode; ///< The build mode to use, for debugging purposes. Set to - ///< -1 to select a minimal-complexity mode automatically. All - ///< build modes deliver similar results with minor - ///< deviations. - ///< - int RndSeed; ///< Random seed parameter. This parameter may be incremented - ///< after each random generator initialization. The use of this - ///< variable depends on the ditherer implementation. - ///< - - CImageResizerVars() - : ox(0.0), - oy(0.0), - ThreadPool(NULL), - UseSRGBGamma(false), - BuildMode(-1), - RndSeed(0) {} -}; - -/** - * @brief Image resizer's filtering step class. - * - * Class defines data to perform a single filtering step over a whole - * horizontal or vertical scanline. Resizing consists of 1 or more steps that - * may be performed before the actual resizing takes place. Filtering may also - * follow a resizing step. Each step must ensure that scanline data contains - * enough pixels to perform the next step (which may be resizing) without - * exceeding scanline's bounds. - * - * A derived class must implement several "const" and "static" functions that - * are used to perform the actual filtering in interleaved or de-interleaved - * mode. - * - * @tparam fptype Floating point type to use for storing pixel elements. SIMD - * types can be used: in this case each element may hold a whole pixel. - * @tparam fptypeatom The atomic type the "fptype" consists of. - */ - -template -class CImageResizerFilterStep { - public: - bool IsUpsample; ///< "True" if this step is an upsampling step, "false" - ///< if downsampling step. Should be set to "false" if - ///< ResampleFactor equals 0. - ///< - int ResampleFactor; ///< Resample factor (>=1). If 0, this is a resizing - ///< step. This value should be >1 if IsUpsample equals - ///< "true". - ///< - CBuffer Flt; ///< Filter to use at this step. - ///< - CFltBuffer FltOrig; ///< Originally-designed filter. This buffer may not - ///< be assigned. Assigned by filters that precede the - ///< resizing step if such filter is planned to be - ///< embedded into the interpolation filter as "external" - ///< filter. If IsUpsample=true and this filter buffer is - ///< not empty, the upsampling step will not itself apply - ///< any filtering over upsampled input scanline. - ///< - double DCGain; ///< DC gain which was applied to the filter. Not defined - ///< if ResampleFactor = 0. - ///< - int FltLatency; ///< Filter's latency (group delay, shift) in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int InLen; ///< Input scanline's length in pixels. - ///< - int InBuf; ///< Input buffer index, 0 or 1. - ///< - int InPrefix; ///< Required input prefix pixels. These prefix pixels will - ///< be filled with source scanline's first pixel value. If - ///< IsUpsample is "true", this is the additional number of - ///< times the first pixel will be filtered before processing - ///< scanline, this number is also reflected in the OutPrefix. - ///< - int InSuffix; ///< Required input suffix pixels. These suffix pixels will - ///< be filled with source scanline's last pixel value. If - ///< IsUpsample is "true", this is the additional number of - ///< times the last pixel will be filtered before processing - ///< scanline, this number is also reflected in the OutSuffix. - ///< - int InElIncr; ///< Pixel element increment within the input buffer, used - ///< during de-interleaved processing: in this case each - ///< image's channel is stored independently, InElIncr elements - ///< apart. - ///< - int OutLen; ///< Length of the resulting scanline. - ///< - int OutBuf; ///< Output buffer index. 0 or 1; 2 for the last step. - ///< - int OutPrefix; ///< Required output prefix pixels. These prefix pixels - ///< will not be pre-filled with any values. Value is valid - ///< only if IsUpsample equals "true". - ///< - int OutSuffix; ///< Required input suffix pixels. These suffix pixels will - ///< not be pre-filled with any values. Value is valid only if - ///< IsUpsample equals "true". - ///< - int OutElIncr; ///< Pixel element increment within the output buffer, used - ///< during de-interleaved processing. Equals to the - ///< InBufElIncr of the next step. - ///< - CBuffer PrefixDC; ///< DC component fluctuations added at the - ///< start of the resulting scanline, used when - ///< IsUpsample equals "true". - ///< - CBuffer SuffixDC; ///< DC component fluctuations added at the - ///< end of the resulting scanline, used when - ///< IsUpsample equals "true". - ///< - int EdgePixelCount; ///< The number of edge pixels added. Affects the - ///< initial position within the input scanline, used to - ///< produce edge pixels. This variable is used and - ///< should be defined when IsUpsample=false and - ///< ResampleFactor>0. When assigning this variable it is - ///< also necessary to update InPrefix, OutLen and Vars.o - ///< variables. - ///< - static const int EdgePixelCountDef = - 3; ///< The default number of pixels - ///< additionally produced at scanline edges during filtering. This is - ///< required to reduce edge artifacts. - ///< - - /** - * @brief Resizing position structure. - * - * Structure holds resizing position and pointer to fractional delay - * filter. - */ - - struct CResizePos { - int SrcPosInt; ///< Source scanline position. - ///< - int fti; ///< Fractional delay filter index. - ///< - const fptype* ftp; ///< Fractional delay filter pointer. - ///< - fptypeatom x; ///< Interpolation coefficient between delay filters. - ///< - int SrcOffs; ///< Source scanline offset. - ///< - }; - - /** - * @brief Resizing positions buffer class. - * - * This class combines buffer together with variables that define resizing - * stepping. - */ - - class CRPosBuf : public CBuffer { - public: - double k; ///< Resizing step. - ///< - double o; ///< Resizing offset. - ///< - int FracCount; ///< The number of fractional delay filters in a filter - ///< bank used together with this buffer. - ///< - }; - - /** - * @brief Resizing positions buffer array class. - * - * This class combines structure array of the CRPosBuf class objects with - * the function that locates or creates buffer with the required resizing - * stepping. - */ - - class CRPosBufArray : public CStructArray { - public: - using CStructArray::add; - using CStructArray::getItemCount; - - /** - * Function returns the resizing positions buffer with the required - * stepping. If no such buffer exists, it is created. - * - * @param k Resizing step. - * @param o Resizing offset. - * @param FracCount The number of fractional delay filters in a filter - * bank used together with this buffer. - * @return Reference to the CRPosBuf object. - */ - - CRPosBuf& getRPosBuf(const double k, const double o, const int FracCount) { - int i; - - for (i = 0; i < getItemCount(); i++) { - CRPosBuf& Buf = (*this)[i]; - - if (Buf.k == k && Buf.o == o && Buf.FracCount == FracCount) { - return (Buf); - } - } - - CRPosBuf& NewBuf = add(); - NewBuf.k = k; - NewBuf.o = o; - NewBuf.FracCount = FracCount; - - return (NewBuf); - } - }; - - CRPosBuf* RPosBuf; ///< Resizing positions buffer. Used when - ///< ResampleFactor equals 0 (resizing step). - ///< - CDSPFracFilterBankLin* FltBank; ///< Filter bank in use by *this - ///< resizing step. - ///< -}; - -/** - * @brief Interleaved filtering steps implementation class. - * - * This class implements scanline filtering functions in interleaved mode. - * This means that each pixel is processed independently, not in groups. - * - * @tparam fptype Floating point type to use for storing pixel elements. SIMD - * types can be used: in this case each element may hold a whole pixel. - * @tparam fptypeatom The atomic type the "fptype" consists of. - */ - -template -class CImageResizerFilterStepINL - : public CImageResizerFilterStep { - public: - using CImageResizerFilterStep::IsUpsample; - using CImageResizerFilterStep::ResampleFactor; - using CImageResizerFilterStep::Flt; - using CImageResizerFilterStep::FltOrig; - using CImageResizerFilterStep::FltLatency; - using CImageResizerFilterStep::Vars; - using CImageResizerFilterStep::InLen; - using CImageResizerFilterStep::InPrefix; - using CImageResizerFilterStep::InSuffix; - using CImageResizerFilterStep::OutLen; - using CImageResizerFilterStep::OutPrefix; - using CImageResizerFilterStep::OutSuffix; - using CImageResizerFilterStep::PrefixDC; - using CImageResizerFilterStep::SuffixDC; - using CImageResizerFilterStep::RPosBuf; - using CImageResizerFilterStep::FltBank; - using CImageResizerFilterStep::EdgePixelCount; - - /** - * Function performs "packing" of a scanline and type conversion. - * Scanline, depending on the "fptype" can be potentially stored as a - * packed SIMD values having a certain atomic type. If required, the sRGB - * gamma correction is applied. - * - * @param ip Input scanline. - * @param op0 Output scanline. - * @param l0 The number of pixels to "pack". - */ - - template - void packScanline(const Tin* ip, fptype* const op0, const int l0) const { - const int ElCount = Vars->ElCount; - const int ElCountIO = Vars->ElCountIO; - fptype* op = op0; - int l = l0; - - if (!Vars->UseSRGBGamma) { - if (ElCountIO == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - op += ElCount; - ip++; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - v[1] = (fptypeatom)ip[1]; - v[2] = (fptypeatom)ip[2]; - v[3] = (fptypeatom)ip[3]; - op += ElCount; - ip += 4; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - v[1] = (fptypeatom)ip[1]; - v[2] = (fptypeatom)ip[2]; - op += ElCount; - ip += 3; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - v[1] = (fptypeatom)ip[1]; - op += ElCount; - ip += 2; - l--; - } - } - } else { - const fptypeatom gm = (fptypeatom)Vars->InGammaMult; - - if (ElCountIO == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - op += ElCount; - ip++; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm); - v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm); - v[3] = convertSRGB2Lin((fptypeatom)ip[3] * gm); - op += ElCount; - ip += 4; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm); - v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm); - op += ElCount; - ip += 3; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm); - op += ElCount; - ip += 2; - l--; - } - } - } - - const int ZeroCount = ElCount * Vars->fppack - ElCountIO; - op = op0; - l = l0; - - if (ZeroCount == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op + ElCountIO; - v[0] = (fptypeatom)0; - op += ElCount; - l--; - } - } else if (ZeroCount == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op + ElCountIO; - v[0] = (fptypeatom)0; - v[1] = (fptypeatom)0; - op += ElCount; - l--; - } - } else if (ZeroCount == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op + ElCountIO; - v[0] = (fptypeatom)0; - v[1] = (fptypeatom)0; - v[2] = (fptypeatom)0; - op += ElCount; - l--; - } - } - } - - /** - * Function applies Linear to sRGB gamma correction to the specified - * scanline. - * - * @param p Scanline. - * @param l The number of pixels to de-linearize. - * @param Vars0 Image resizing-related variables. - */ - - static void applySRGBGamma(fptype* p, int l, const CImageResizerVars& Vars0) { - const int ElCount = Vars0.ElCount; - const int ElCountIO = Vars0.ElCountIO; - const fptypeatom gm = (fptypeatom)Vars0.OutGammaMult; - - if (ElCountIO == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - p += ElCount; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - v[1] = convertLin2SRGB(v[1]) * gm; - v[2] = convertLin2SRGB(v[2]) * gm; - v[3] = convertLin2SRGB(v[3]) * gm; - p += ElCount; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - v[1] = convertLin2SRGB(v[1]) * gm; - v[2] = convertLin2SRGB(v[2]) * gm; - p += ElCount; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - v[1] = convertLin2SRGB(v[1]) * gm; - p += ElCount; - l--; - } - } - } - - /** - * Function converts vertical scanline to horizontal scanline. This - * function is called by the image resizer when image is resized - * vertically. This means that the vertical scanline is stored in the - * same format produced by the packScanline() and maintained by other - * filtering functions. - * - * @param ip Input vertical scanline. - * @param op Output buffer (temporary buffer used during resizing). - * @param SrcLen The number of pixels in the input scanline, also used to - * calculate input buffer increment. - * @param SrcIncr Input buffer increment to the next vertical pixel. - */ - - void convertVtoH(const fptype* ip, fptype* op, const int SrcLen, - const int SrcIncr) const { - const int ElCount = Vars->ElCount; - int j; - - if (ElCount == 1) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - ip += SrcIncr; - op++; - } - } else if (ElCount == 4) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - op[3] = ip[3]; - ip += SrcIncr; - op += 4; - } - } else if (ElCount == 3) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - ip += SrcIncr; - op += 3; - } - } else if (ElCount == 2) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - op[1] = ip[1]; - ip += SrcIncr; - op += 2; - } - } - } - - /** - * Function performs "unpacking" of a scanline and type conversion - * (truncation is used when floating point is converted to integer). - * Scanline, depending on the "fptype" can be potentially stored as a - * packed SIMD values having a certain atomic type. The unpacking function - * assumes that scanline is stored in the style produced by the - * packScanline() function. - * - * @param ip Input scanline. - * @param op Output scanline. - * @param l The number of pixels to "unpack". - * @param Vars0 Image resizing-related variables. - */ - - template - static void unpackScanline(const fptype* ip, Tout* op, int l, - const CImageResizerVars& Vars0) { - const int ElCount = Vars0.ElCount; - const int ElCountIO = Vars0.ElCountIO; - - if (ElCountIO == 1) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - ip += ElCount; - op++; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - op[1] = (Tout)v[1]; - op[2] = (Tout)v[2]; - op[3] = (Tout)v[3]; - ip += ElCount; - op += 4; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - op[1] = (Tout)v[1]; - op[2] = (Tout)v[2]; - ip += ElCount; - op += 3; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - op[1] = (Tout)v[1]; - ip += ElCount; - op += 2; - l--; - } - } - } - - /** - * Function prepares input scanline buffer for *this filtering step. - * Left- and right-most pixels are replicated to make sure no buffer - * overrun happens. Such approach also allows to bypass any pointer - * range checks. - * - * @param Src Source buffer. - */ - - void prepareInBuf(fptype* Src) const { - if (IsUpsample || InPrefix + InSuffix == 0) { - return; - } - - const int ElCount = Vars->ElCount; - replicateArray(Src, ElCount, Src - ElCount, InPrefix, -ElCount); - - Src += (InLen - 1) * ElCount; - replicateArray(Src, ElCount, Src + ElCount, InSuffix, ElCount); - } - - /** - * Function peforms scanline upsampling with filtering. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to ElCount. - * @param Dst Destination scanline buffer. - */ - - void doUpsample(const fptype* const Src, fptype* const Dst) const { - const int ElCount = Vars->ElCount; - fptype* op0 = &Dst[-OutPrefix * ElCount]; - memset(&op0->value, 0, (OutPrefix + OutLen + OutSuffix) * ElCount * sizeof(op0->value)); - - const fptype* ip = Src; - const int opstep = ElCount * ResampleFactor; - int l; - - if (FltOrig.getCapacity() > 0) { - // Do not perform filtering, only upsample. - - op0 += (OutPrefix % ResampleFactor) * ElCount; - l = OutPrefix / ResampleFactor; - - if (ElCount == 1) { - while (l > 0) { - op0[0] = ip[0]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0 += opstep; - l--; - } - } else if (ElCount == 4) { - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0[3] = ip[3]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0[3] = ip[3]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0[3] = ip[3]; - op0 += opstep; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0 += opstep; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0 += opstep; - l--; - } - } - - return; - } - - const fptype* const f = Flt; - const int flen = Flt.getCapacity(); - fptype* op; - int i; - - if (ElCount == 1) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[i] += f[i] * ip[0]; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[i] += f[i] * ip[0]; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[i] += f[i] * ip[0]; - } - - op0 += opstep; - l--; - } - } else if (ElCount == 4) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op[3] += f[i] * ip[3]; - op += 4; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op[3] += f[i] * ip[3]; - op += 4; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op[3] += f[i] * ip[3]; - op += 4; - } - - op0 += opstep; - l--; - } - } else if (ElCount == 3) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op += 3; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op += 3; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op += 3; - } - - op0 += opstep; - l--; - } - } else if (ElCount == 2) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op += 2; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op += 2; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op += 2; - } - - op0 += opstep; - l--; - } - } - - op = op0; - const fptype* dc = SuffixDC; - l = SuffixDC.getCapacity(); - - if (ElCount == 1) { - for (i = 0; i < l; i++) { - op[i] += ip[0] * dc[i]; - } - } else if (ElCount == 4) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - op[3] += ip[3] * dc[0]; - dc++; - op += 4; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - dc++; - op += 3; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - dc++; - op += 2; - l--; - } - } - - ip = Src; - op = Dst - InPrefix * opstep; - dc = PrefixDC; - l = PrefixDC.getCapacity(); - - if (ElCount == 1) { - for (i = 0; i < l; i++) { - op[i] += ip[0] * dc[i]; - } - } else if (ElCount == 4) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - op[3] += ip[3] * dc[0]; - dc++; - op += 4; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - dc++; - op += 3; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - dc++; - op += 2; - l--; - } - } - } - - /** - * Function peforms scanline filtering with optional downsampling. - * Function makes use of the symmetry of the filter. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to ElCount. - * @param Dst Destination scanline buffer. - * @param DstIncr Destination scanline buffer increment, used for - * horizontal or vertical scanline stepping. - */ - - void doFilter(const fptype* const Src, fptype* Dst, const int DstIncr) const { - const int ElCount = Vars->ElCount; - const fptype* const f = &Flt[FltLatency]; - const int flen = FltLatency + 1; - const int ipstep = ElCount * ResampleFactor; - const fptype* ip = Src - EdgePixelCount * ipstep; - const fptype* ip1; - const fptype* ip2; - int l = OutLen; - int i; - - if (ElCount == 1) { - while (l > 0) { - fptype s = f[0] * ip[0]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1++; - ip2--; - s += f[i] * (ip1[0] + ip2[0]); - } - - Dst[0] = s; - Dst += DstIncr; - ip += ipstep; - l--; - } - } else if (ElCount == 4) { - while (l > 0) { - fptype s1 = f[0] * ip[0]; - fptype s2 = f[0] * ip[1]; - fptype s3 = f[0] * ip[2]; - fptype s4 = f[0] * ip[3]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1 += 4; - ip2 -= 4; - s1 += f[i] * (ip1[0] + ip2[0]); - s2 += f[i] * (ip1[1] + ip2[1]); - s3 += f[i] * (ip1[2] + ip2[2]); - s4 += f[i] * (ip1[3] + ip2[3]); - } - - Dst[0] = s1; - Dst[1] = s2; - Dst[2] = s3; - Dst[3] = s4; - Dst += DstIncr; - ip += ipstep; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - fptype s1 = f[0] * ip[0]; - fptype s2 = f[0] * ip[1]; - fptype s3 = f[0] * ip[2]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1 += 3; - ip2 -= 3; - s1 += f[i] * (ip1[0] + ip2[0]); - s2 += f[i] * (ip1[1] + ip2[1]); - s3 += f[i] * (ip1[2] + ip2[2]); - } - - Dst[0] = s1; - Dst[1] = s2; - Dst[2] = s3; - Dst += DstIncr; - ip += ipstep; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - fptype s1 = f[0] * ip[0]; - fptype s2 = f[0] * ip[1]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1 += 2; - ip2 -= 2; - s1 += f[i] * (ip1[0] + ip2[0]); - s2 += f[i] * (ip1[1] + ip2[1]); - } - - Dst[0] = s1; - Dst[1] = s2; - Dst += DstIncr; - ip += ipstep; - l--; - } - } - } - - /** - * Function performs resizing of a single scanline. This function does - * not "know" about the length of the source scanline buffer. This buffer - * should be padded with enough pixels so that ( SrcPos - FilterLenD2 ) is - * always >= 0 and ( SrcPos + ( DstLineLen - 1 ) * k + FilterLenD2 + 1 ) - * does not exceed source scanline's buffer length. SrcLine's increment is - * assumed to be equal to ElCount. - * - * @param SrcLine Source scanline buffer. - * @param DstLine Destination (resized) scanline buffer. - * @param DstLineIncr Destination scanline position increment, used for - * horizontal or vertical scanline stepping. - * @param xx Temporary buffer, of size FltBank -> getFilterLen(), must be - * aligned by fpclass :: fpalign. - */ - - void doResize(const fptype* SrcLine, fptype* DstLine, const int DstLineIncr, - fptype* const) const { - const int IntFltLen = FltBank->getFilterLen(); - const int ElCount = Vars->ElCount; - const typename CImageResizerFilterStep::CResizePos* - rpos = &(*RPosBuf)[0]; - - const typename CImageResizerFilterStep< - fptype, fptypeatom>::CResizePos* const rpose = rpos + OutLen; - -#define AVIR_RESIZE_PART1 \ - while (rpos < rpose) { \ - const fptype x = (fptype)rpos->x; \ - const fptype* const ftp = rpos->ftp; \ - const fptype* const ftp2 = ftp + IntFltLen; \ - const fptype* Src = SrcLine + rpos->SrcOffs; \ - int i; - -#define AVIR_RESIZE_PART1nx \ - while (rpos < rpose) { \ - const fptype* const ftp = rpos->ftp; \ - const fptype* Src = SrcLine + rpos->SrcOffs; \ - int i; - -#define AVIR_RESIZE_PART2 \ - DstLine += DstLineIncr; \ - rpos++; \ - } - - if (FltBank->getOrder() == 1) { - if (ElCount == 1) { - AVIR_RESIZE_PART1 - - fptype sum = 0.0; - - for (i = 0; i < IntFltLen; i++) { - sum += (ftp[i] + ftp2[i] * x) * Src[i]; - } - - DstLine[0] = sum; - - AVIR_RESIZE_PART2 - } else if (ElCount == 4) { - AVIR_RESIZE_PART1 - - fptype sum[4]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - sum[3] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i] + ftp2[i] * x; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - sum[3] += xx * Src[3]; - Src += 4; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - DstLine[3] = sum[3]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 3) { - AVIR_RESIZE_PART1 - - fptype sum[3]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i] + ftp2[i] * x; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - Src += 3; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 2) { - AVIR_RESIZE_PART1 - - fptype sum[2]; - sum[0] = 0.0; - sum[1] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i] + ftp2[i] * x; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - Src += 2; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - - AVIR_RESIZE_PART2 - } - } else { - if (ElCount == 1) { - AVIR_RESIZE_PART1nx - - fptype sum = 0.0; - - for (i = 0; i < IntFltLen; i++) { - sum += ftp[i] * Src[i]; - } - - DstLine[0] = sum; - - AVIR_RESIZE_PART2 - } else if (ElCount == 4) { - AVIR_RESIZE_PART1nx - - fptype sum[4]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - sum[3] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i]; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - sum[3] += xx * Src[3]; - Src += 4; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - DstLine[3] = sum[3]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 3) { - AVIR_RESIZE_PART1nx - - fptype sum[3]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i]; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - Src += 3; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 2) { - AVIR_RESIZE_PART1nx - - fptype sum[2]; - sum[0] = 0.0; - sum[1] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i]; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - Src += 2; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - - AVIR_RESIZE_PART2 - } - } - } -#undef AVIR_RESIZE_PART2 -#undef AVIR_RESIZE_PART1nx -#undef AVIR_RESIZE_PART1 -}; - -/** - * @brief Image resizer's default dithering class. - * - * This class defines an object that performs rounding, clipping and dithering - * operations over horizontal scanline pixels before scanline is stored in the - * output buffer. - * - * The ditherer should expect the same storage order of the pixels in a - * scanline as used in the "filtering step" class. So, a separate ditherer - * class should be defined for each scanline pixel storage style. The default - * ditherer implements a simple rounding without dithering: it can be used for - * an efficient dithering method which can be multi-threaded. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types can be used. - */ - -template -class CImageResizerDithererDefINL { - public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul, - const double aPkOut) { - Len = aLen; - Vars = &aVars; - LenE = aLen * Vars->ElCount; - TrMul0 = aTrMul; - PkOut0 = aPkOut; - } - - /** - * @return "True" if dithering is recursive relative to scanlines meaning - * multi-threaded execution is not supported by this dithering method. - */ - - static bool isRecursive() { return (false); } - - /** - * Function performs rounding and clipping operations. - * - * @param ResScanline The buffer containing the final scanline. - */ - - void dither(fptype* const ResScanline) const { - const fptype c0 = 0.0; - const fptype PkOut = (fptype)PkOut0; - int j; - - if (TrMul0 == 1.0) { - // Optimization - do not perform bit depth truncation. - - for (j = 0; j < LenE; j++) { - ResScanline[j] = clamp(round(ResScanline[j]), c0, PkOut); - } - } else { - const fptype TrMul = (fptype)TrMul0; - - for (j = 0; j < LenE; j++) { - const fptype z0 = round(ResScanline[j] / TrMul) * TrMul; - ResScanline[j] = clamp(z0, c0, PkOut); - } - } - } - - protected: - int Len; ///< Scanline's length in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int LenE; ///< = LenE * ElCount. - ///< - double TrMul0; ///< Bit-depth truncation multiplier. - ///< - double PkOut0; ///< Peak output value allowed. - ///< -}; - -/** - * @brief Image resizer's error-diffusion dithering class, interleaved mode. - * - * This ditherer implements error-diffusion dithering which looks good, and - * whose results are compressed by PNG well. This implementation uses - * weighting coefficients obtained via machine optimization and visual - * evaluation. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types can be used. - */ - -template -class CImageResizerDithererErrdINL - : public CImageResizerDithererDefINL { - public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul, - const double aPkOut) { - CImageResizerDithererDefINL::init(aLen, aVars, aTrMul, aPkOut); - - ResScanlineDith0.alloc(LenE + Vars->ElCount, sizeof(fptype)); - ResScanlineDith = ResScanlineDith0 + Vars->ElCount; - int i; - - for (i = 0; i < LenE + Vars->ElCount; i++) { - ResScanlineDith0[i] = 0.0; - } - } - - static bool isRecursive() { return (true); } - - void dither(fptype* const ResScanline) { - const int ElCount = Vars->ElCount; - const fptype c0 = 0.0; - const fptype TrMul = (fptype)TrMul0; - const fptype PkOut = (fptype)PkOut0; - int j; - - for (j = 0; j < LenE; j++) { - ResScanline[j] += ResScanlineDith[j]; - ResScanlineDith[j] = 0.0; - } - - for (j = 0; j < LenE - ElCount; j++) { - // Perform rounding, noise estimation and saturation. - - const fptype z0 = round(ResScanline[j] / TrMul) * TrMul; - const fptype Noise = ResScanline[j] - z0; - ResScanline[j] = clamp(z0, c0, PkOut); - - ResScanline[j + ElCount] += Noise * (fptype)0.364842; - ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305; - ResScanlineDith[j] += Noise * (fptype)0.364842; - ResScanlineDith[j + ElCount] += Noise * (fptype)0.063011; - } - - while (j < LenE) { - const fptype z0 = round(ResScanline[j] / TrMul) * TrMul; - const fptype Noise = ResScanline[j] - z0; - ResScanline[j] = clamp(z0, c0, PkOut); - - ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305; - ResScanlineDith[j] += Noise * (fptype)0.364842; - j++; - } - } - - protected: - using CImageResizerDithererDefINL::Len; - using CImageResizerDithererDefINL::Vars; - using CImageResizerDithererDefINL::LenE; - using CImageResizerDithererDefINL::TrMul0; - using CImageResizerDithererDefINL::PkOut0; - - CBuffer ResScanlineDith0; ///< Error diffusion buffer. - ///< - fptype* ResScanlineDith; ///< Error diffusion buffer pointer which skips - ///< the first ElCount elements. - ///< -}; - -/** - * @brief Floating-point processing definition and abstraction class. - * - * This class defines several constants and typedefs that point to classes - * that should be used by the image resizing algorithm. Such "definition - * class" can be used to define alternative scanline processing algorithms - * (e.g. SIMD) and image scanline packing styles used during processing. This - * class also offers an abstraction layer for dithering, rounding and - * clamping (saturation) operation. - * - * The fpclass_def class can be used to define processing using both SIMD and - * non-SIMD types, but using algorithms that are operate on interleaved pixels - * and non-SIMD optimized themselves. - * - * @tparam afptype Floating point type to use for storing intermediate data - * and variables. For variables that are not used in intensive calculations - * the "double" type is always used. On the latest Intel processors (like - * i7-4770K) there is almost no performance difference between "double" and - * "float". Image quality differences between "double" and "float" are not - * apparent on 8-bit images. At the same time the "float" uses half amount of - * working memory the "double" type uses. SIMD types can be used. The - * functions round() and clamp() in the "avir" or other visible namespace - * should be available for the specified type. SIMD types allow to perform - * resizing of images with more than 4 channels, to be exact 4 * SIMD element - * number (e.g. 16 for float4), without modification of the image resizing - * algorithm required. - * @tparam afptypeatom The atomic type the "afptype" consists of. - * @tparam adith Ditherer class to use during processing. - */ - -template > -class fpclass_def { - public: - typedef afptype fptype; ///< Floating-point type to use during processing. - ///< - typedef afptypeatom fptypeatom; ///< Atomic type "fptype" consists of. - ///< - static const int fppack = - sizeof(fptype) / - sizeof(fptypeatom); ///< - ///< The number of atomic types stored in a single - ///< "fptype" element. - ///< - static const int fpalign = - sizeof(fptype); ///< Suggested alignment size - ///< in bytes. This is not a required alignment, because - ///< image resizing algorithm cannot be made to have a - ///< strictly aligned data access at all steps (e.g. - ///< interpolation cannot perform aligned accesses). - ///< - static const int elalign = - 1; ///< Length alignment of arrays of elements. - ///< This applies to filters and intermediate buffers: this constant - ///< forces filters and scanlines to have a length which is a multiple - ///< of this value, for more efficient SIMD implementation. - ///< - static const int packmode = 0; ///< 0 if interleaved packing, 1 if - ///< de-interleaved. - ///< - typedef CImageResizerFilterStepINL - CFilterStep; ///< - ///< Filtering step class to use during processing. - ///< - typedef adith CDitherer; ///< Ditherer class to use during processing. - ///< -}; - -/** - * @brief Image resizer class. - * - * The object of this class can be used to resize 1-4 channel images to any - * required size. Resizing is performed by utilizing interpolated sinc - * fractional delay filters plus (if necessary) a cascade of built-in - * sinc function-based 2X upsampling or 2X downsampling stages, followed by a - * correction filtering. - * - * Object of this class can be allocated on stack. - * - * @tparam fpclass Floating-point processing definition class to use. See - * avir::fpclass_def for more details. - */ - -template > -class CImageResizer { - public: - /** - * Constructor initializes the resizer. - * - * @param aResBitDepth Required bit depth of resulting image (1-16). If - * integer value output is used (e.g. uint8_t), the bit depth also affects - * rounding: for example, if aResBitDepth=6 and "Tout" is uint8_t, the - * result will be rounded to 6 most significant bits (2 least significant - * bits truncated, with dithering applied). - * @param aSrcBitDepth Source image's real bit-depth. Set to 0 to use - * aResBitDepth. - * @param aParams Resizing algorithm's parameters to use. Leave out for - * default values. Can be useful when performing automatic optimization of - * parameters. - */ - - CImageResizer(const int aResBitDepth = 8, const int aSrcBitDepth = 0, - const CImageResizerParams& aParams = CImageResizerParamsDef()) - : Params(aParams), ResBitDepth(aResBitDepth) { - SrcBitDepth = (aSrcBitDepth == 0 ? ResBitDepth : aSrcBitDepth); - - initFilterBank(FixedFilterBank, 1.0, false, CFltBuffer()); - FixedFilterBank.createAllFilters(); - } - - /** - * Function resizes image. - * - * @param SrcBuf Source image buffer. - * @param SrcWidth Source image width. - * @param SrcHeight Source image height. - * @param SrcScanlineSize Physical size of source scanline in elements - * (not bytes). If this value is below 1, SrcWidth * ElCountIO will be - * used as the physical source scanline size. - * @param[out] NewBuf Buffer to accept the resized image. Can be equal to - * SrcBuf if the size of the resized image is smaller or equal to source - * image in size. - * @param NewWidth New image width. - * @param NewHeight New image height. - * @param ElCountIO The number of elements (channels) used to store each - * source and destination pixel (1-4). - * @param k Resizing step (one output pixel corresponds to "k" input - * pixels). A downsizing factor if > 1.0; upsizing factor if <= 1.0. - * Multiply by -1 if you would like to bypass "ox" and "oy" adjustment - * which is done by default to produce a centered image. If step value - * equals 0, the step value will be chosen automatically and independently - * for horizontal and vertical resizing. - * @param[in,out] aVars Pointer to variables structure to be passed to the - * image resizing function. Can be NULL. Only variables that are - * initialized in default constructor of this structure are accepted by - * this function. These variables will not be changed by this function. - * All other variables can be modified by this function. The access to - * this object is not thread-safe, each concurrent instance of this - * function should use a separate aVars object. - * @tparam Tin Input buffer element's type. Can be uint8_t (0-255 value - * range), uint16_t (0-65535 value range), float (0.0-1.0 value range), - * double (0.0-1.0 value range). Larger integer types are treated as - * uint16_t. Signed integer types are unsupported. - * @tparam Tout Output buffer element's type. Can be uint8_t (0-255 value - * range), uint16_t (0-65535 value range), float (0.0-1.0 value range), - * double (0.0-1.0 value range). Larger integer types are treated as - * uint16_t. Signed integer types are unsupported. - */ - - template - void resizeImage(const Tin* const SrcBuf, const int SrcWidth, - const int SrcHeight, int SrcScanlineSize, Tout* const NewBuf, - const int NewWidth, const int NewHeight, const int ElCountIO, - const double k, - CImageResizerVars* const aVars = NULL) const { - if (SrcWidth == 0 || SrcHeight == 0) { - memset(NewBuf, 0, (size_t)NewWidth * NewHeight * sizeof(Tout)); - - return; - } else if (NewWidth == 0 || NewHeight == 0) { - return; - } - - CImageResizerVars DefVars; - CImageResizerVars& Vars = (aVars == NULL ? DefVars : *aVars); - - CImageResizerThreadPool DefThreadPool; - CImageResizerThreadPool& ThreadPool = - (Vars.ThreadPool == NULL ? DefThreadPool : *Vars.ThreadPool); - - // Define resizing steps, also optionally modify offsets so that - // resizing produces a "centered" image. - - double kx; - double ky; - double ox = Vars.ox; - double oy = Vars.oy; - - if (k == 0.0) { - if (NewWidth > SrcWidth) { - kx = (double)(SrcWidth - 1) / (NewWidth - 1); - } else { - kx = (double)SrcWidth / NewWidth; - ox += (kx - 1.0) * 0.5; - } - - if (NewHeight > SrcHeight) { - ky = (double)(SrcHeight - 1) / (NewHeight - 1); - } else { - ky = (double)SrcHeight / NewHeight; - oy += (ky - 1.0) * 0.5; - } - } else if (k > 0.0) { - kx = k; - ky = k; - - if (k > 1.0) { - const double ko = (k - 1.0) * 0.5; - ox += ko; - oy += ko; - } - } else { - kx = -k; - ky = -k; - } - - // Evaluate pre-multipliers used on the output stage. - - const bool IsInFloat = ((Tin)0.4 != 0); - const bool IsOutFloat = ((Tout)0.4 != 0); - double OutMul; // Output multiplier. - - if (Vars.UseSRGBGamma) { - if (IsInFloat) { - Vars.InGammaMult = 1.0; - } else { - Vars.InGammaMult = 1.0 / (sizeof(Tin) == 1 ? 255.0 : 65535.0); - } - - if (IsOutFloat) { - Vars.OutGammaMult = 1.0; - } else { - Vars.OutGammaMult = (sizeof(Tout) == 1 ? 255.0 : 65535.0); - } - - OutMul = 1.0; - } else { - if (IsOutFloat) { - OutMul = 1.0; - } else { - OutMul = (sizeof(Tout) == 1 ? 255.0 : 65535.0); - } - - if (!IsInFloat) { - OutMul /= (sizeof(Tin) == 1 ? 255.0 : 65535.0); - } - } - - // Fill widely-used variables. - - const int ElCount = (ElCountIO + fpclass ::fppack - 1) / fpclass ::fppack; - - const int NewWidthE = NewWidth * ElCount; - - if (SrcScanlineSize < 1) { - SrcScanlineSize = SrcWidth * ElCountIO; - } - - Vars.ElCount = ElCount; - Vars.ElCountIO = ElCountIO; - Vars.fppack = fpclass ::fppack; - Vars.fpalign = fpclass ::fpalign; - Vars.elalign = fpclass ::elalign; - Vars.packmode = fpclass ::packmode; - - // Horizontal scanline filtering and resizing. - - CDSPFracFilterBankLin FltBank; - CFilterSteps FltSteps; - typename CFilterStep ::CRPosBufArray RPosBufArray; - CBuffer UsedFracMap; - - // Perform the filtering steps modeling at various modes, find the - // most efficient mode for both horizontal and vertical resizing. - - int UseBuildMode = 1; - const int BuildModeCount = (FixedFilterBank.getOrder() == 0 ? 4 : 2); - - int m; - - if (Vars.BuildMode >= 0) { - UseBuildMode = Vars.BuildMode; - } else { - int BestScore = 0x7FFFFFFF; - - for (m = 0; m < BuildModeCount; m++) { - CDSPFracFilterBankLin TmpBank; - CFilterSteps TmpSteps; - Vars.k = kx; - Vars.o = ox; - buildFilterSteps(TmpSteps, Vars, TmpBank, OutMul, m, true); - updateFilterStepBuffers(TmpSteps, Vars, RPosBufArray, SrcWidth, - NewWidth); - - fillUsedFracMap(TmpSteps[Vars.ResizeStep], UsedFracMap); - const int c = calcComplexity(TmpSteps, Vars, UsedFracMap, SrcHeight); - - if (c < BestScore) { - UseBuildMode = m; - BestScore = c; - } - } - } - - // Perform the actual filtering steps building. - - Vars.k = kx; - Vars.o = ox; - buildFilterSteps(FltSteps, Vars, FltBank, OutMul, UseBuildMode, false); - - updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcWidth, NewWidth); - - updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth); - - const int ThreadCount = ThreadPool.getSuggestedWorkloadCount(); - // Includes the current thread. - - CStructArray > td; - td.setItemCount(ThreadCount); - int i; - - for (i = 0; i < ThreadCount; i++) { - if (i > 0) { - ThreadPool.addWorkload(&td[i]); - } - - td[i].init(i, ThreadCount, FltSteps, Vars); - - td[i].initScanlineQueue(td[i].sopResizeH, SrcHeight, SrcWidth); - } - - CBuffer FltBuf( - (size_t)NewWidthE * SrcHeight, - fpclass ::fpalign); // Temporary buffer that receives - // horizontally-filtered and resized image. - - for (i = 0; i < SrcHeight; i++) { - td[i % ThreadCount].addScanlineToQueue( - (void*)&SrcBuf[(size_t)i * SrcScanlineSize], - &FltBuf[(size_t)i * NewWidthE]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - - // Vertical scanline filtering and resizing, reuse previously defined - // filtering steps if possible. - - const int PrevUseBuildMode = UseBuildMode; - - if (Vars.BuildMode >= 0) { - UseBuildMode = Vars.BuildMode; - } else { - CImageResizerVars TmpVars(Vars); - int BestScore = 0x7FFFFFFF; - - for (m = 0; m < BuildModeCount; m++) { - CDSPFracFilterBankLin TmpBank; - TmpBank.copyInitParams(FltBank); - CFilterSteps TmpSteps; - TmpVars.k = ky; - TmpVars.o = oy; - buildFilterSteps(TmpSteps, TmpVars, TmpBank, 1.0, m, true); - updateFilterStepBuffers(TmpSteps, TmpVars, RPosBufArray, SrcHeight, - NewHeight); - - fillUsedFracMap(TmpSteps[TmpVars.ResizeStep], UsedFracMap); - - const int c = calcComplexity(TmpSteps, TmpVars, UsedFracMap, NewWidth); - - if (c < BestScore) { - UseBuildMode = m; - BestScore = c; - } - } - } - - Vars.k = ky; - Vars.o = oy; - - if (UseBuildMode == PrevUseBuildMode && ky == kx) { - if (OutMul != 1.0) { - modifyCorrFilterDCGain(FltSteps, 1.0 / OutMul); - } - } else { - buildFilterSteps(FltSteps, Vars, FltBank, 1.0, UseBuildMode, false); - } - - updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcHeight, NewHeight); - - updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth); - - if (IsOutFloat && sizeof(FltBuf[0]) == sizeof(Tout) && - fpclass ::packmode == 0) { - // In-place output. - - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight, - NewWidthE, NewWidthE); - } - - for (i = 0; i < NewWidth; i++) { - td[i % ThreadCount].addScanlineToQueue( - &FltBuf[(size_t)i * ElCount], - (fptype*)&NewBuf[(size_t)i * ElCount]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - ThreadPool.removeAllWorkloads(); - - return; - } - - CBuffer ResBuf((size_t)NewWidthE * NewHeight, - fpclass ::fpalign); - - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight, NewWidthE, - NewWidthE); - } - - const int im = (fpclass ::packmode == 0 ? ElCount : 1); - - for (i = 0; i < NewWidth; i++) { - td[i % ThreadCount].addScanlineToQueue(&FltBuf[(size_t)i * im], - &ResBuf[(size_t)i * im]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - - if (IsOutFloat) { - // Perform output, but skip dithering. - - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopUnpackH, NewHeight, NewWidth); - } - - for (i = 0; i < NewHeight; i++) { - td[i % ThreadCount].addScanlineToQueue( - &ResBuf[(size_t)i * NewWidthE], - &NewBuf[(size_t)i * NewWidth * ElCountIO]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - ThreadPool.removeAllWorkloads(); - - return; - } - - // Perform output with dithering (for integer output only). - - int TruncBits; // The number of lower bits to truncate and dither. - int OutRange; // Output range. - - if (sizeof(Tout) == 1) { - TruncBits = 8 - ResBitDepth; - OutRange = 255; - } else { - TruncBits = 16 - ResBitDepth; - OutRange = 65535; - } - - const double PkOut = OutRange; - const double TrMul = - (TruncBits > 0 ? PkOut / (OutRange >> TruncBits) : 1.0); - - if (CDitherer ::isRecursive()) { - td[0].getDitherer().init(NewWidth, Vars, TrMul, PkOut); - - if (Vars.UseSRGBGamma) { - for (i = 0; i < NewHeight; i++) { - fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE]; - - CFilterStep ::applySRGBGamma(ResScanline, NewWidth, Vars); - - td[0].getDitherer().dither(ResScanline); - - CFilterStep ::unpackScanline( - ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth, - Vars); - } - } else { - for (i = 0; i < NewHeight; i++) { - fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE]; - - td[0].getDitherer().dither(ResScanline); - - CFilterStep ::unpackScanline( - ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth, - Vars); - } - } - } else { - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopDitherAndUnpackH, NewHeight, NewWidth); - - td[i].getDitherer().init(NewWidth, Vars, TrMul, PkOut); - } - - for (i = 0; i < NewHeight; i++) { - td[i % ThreadCount].addScanlineToQueue( - &ResBuf[(size_t)i * NewWidthE], - &NewBuf[(size_t)i * NewWidth * ElCountIO]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - } - - ThreadPool.removeAllWorkloads(); - } - - private: - typedef typename fpclass ::fptype fptype; ///< Floating-point type to use - ///< during processing. - ///< - typedef typename fpclass ::CFilterStep - CFilterStep; ///< Filtering step - ///< class to use during processing. - ///< - typedef typename fpclass ::CDitherer CDitherer; ///< Ditherer class to - ///< use during processing. - ///< - CImageResizerParams Params; ///< Algorithm's parameters currently in use. - ///< - int SrcBitDepth; ///< Bit resolution of the source image. - ///< - int ResBitDepth; ///< Bit resolution of the resulting image. - ///< - CDSPFracFilterBankLin - FixedFilterBank; ///< Fractional delay - ///< filter bank with fixed characteristics, mainly for - ///< upsizing cases. - ///< - - /** - * @brief Filtering steps array. - * - * The object of this class stores filtering steps together. - */ - - typedef CStructArray CFilterSteps; - - /** - * Function initializes the filter bank in the specified resizing step - * according to the source and resulting image bit depths. - * - * @param FltBank Filter bank to initialize. - * @param CutoffMult Cutoff multiplier, 0 to 1. 1 corresponds to 0.5pi - * cutoff point. - * @param ForceHiOrder "True" if a high-order interpolation should be - * forced which requires considerably less resources for initialization. - * @param ExtFilter External filter to apply to interpolation filter. - */ - - void initFilterBank(CDSPFracFilterBankLin& FltBank, - const double CutoffMult, const bool ForceHiOrder, - const CFltBuffer& ExtFilter) const { - const int IntBitDepth = - (ResBitDepth > SrcBitDepth ? ResBitDepth : SrcBitDepth); - - const double SNR = -6.02 * (IntBitDepth + 3); - int UseOrder; - int FracCount; // The number of fractional delay filters sampled by - // the filter bank. This variable affects the - // signal-to-noise ratio at interpolation stage. - // Theoretically, at UseOrder==1, 8-bit image resizing - // requires 66.2 dB SNR or 11. 16-bit resizing requires - // 114.4 dB SNR or 150. At UseOrder=0 the required number of - // filters is exponentially higher. - - if (ForceHiOrder || IntBitDepth > 8) { - UseOrder = 1; // -146 dB max - FracCount = (int)ceil(0.23134052 * exp(-0.058062929 * SNR)); - } else { - UseOrder = 0; // -72 dB max - FracCount = (int)ceil(0.33287686 * exp(-0.11334583 * SNR)); - } - - if (FracCount < 2) { - FracCount = 2; - } - - FltBank.init(FracCount, UseOrder, Params.IntFltLen / CutoffMult, - Params.IntFltCutoff * CutoffMult, Params.IntFltAlpha, - ExtFilter, fpclass ::fpalign, fpclass ::elalign); - } - - /** - * Function allocates filter buffer taking "fpclass" alignments into - * account. The allocated buffer may be larger than the requested size: in - * this case the additional elements will be zeroed by this function. - * - * @param Flt Filter buffer. - * @param ReqCapacity The required filter buffer's capacity. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter allocation. - * @param FltExt If non-NULL this variable will receive the number of - * elements the filter was extended by. - */ - - static void allocFilter(CBuffer& Flt, const int ReqCapacity, - const bool IsModel = false, - int* const FltExt = NULL) { - int UseCapacity = - (ReqCapacity + fpclass ::elalign - 1) & ~(fpclass ::elalign - 1); - - int Ext = UseCapacity - ReqCapacity; - - if (FltExt != NULL) { - *FltExt = Ext; - } - - if (IsModel) { - Flt.forceCapacity(UseCapacity); - return; - } - - Flt.alloc(UseCapacity, fpclass ::fpalign); - - while (Ext > 0) { - Ext--; - Flt[ReqCapacity + Ext] = 0.0; - } - } - - /** - * Function assigns filter parameters to the specified filtering step - * object. - * - * @param fs Filtering step to assign parameter to. This step cannot be - * the last step if ResampleFactor greater than 1 was specified. - * @param IsUpsample "True" if upsampling step. Should be set to "false" - * if FltCutoff is negative. - * @param ResampleFactor Resampling factor of this filter (>=1). - * @param FltCutoff Filter cutoff point. This value will be divided by the - * ResampleFactor if IsUpsample equals "true". If zero value was - * specified, the "half-band" predefined filter will be created. In this - * case the ResampleFactor will modify the filter cutoff point. - * @param DCGain DC gain to apply to the filter. Assigned to filtering - * step's DCGain variable. - * @param UseFltOrig "True" if the originally-designed filter should be - * left in filtering step's FltOrig buffer. Otherwise it will be freed. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - void assignFilterParams(CFilterStep& fs, const bool IsUpsample, - const int ResampleFactor, const double FltCutoff, - const double DCGain, const bool UseFltOrig, - const bool IsModel) const { - double FltAlpha; - double Len2; - double Freq; - - if (FltCutoff == 0.0) { - const double m = 2.0 / ResampleFactor; - FltAlpha = Params.HBFltAlpha; - Len2 = 0.5 * Params.HBFltLen / m; - Freq = AVIR_PI * Params.HBFltCutoff * m; - } else { - FltAlpha = Params.LPFltAlpha; - Len2 = 0.25 * Params.LPFltBaseLen / FltCutoff; - Freq = AVIR_PI * Params.LPFltCutoffMult * FltCutoff; - } - - if (IsUpsample) { - Len2 *= ResampleFactor; - Freq /= ResampleFactor; - fs.DCGain = DCGain * ResampleFactor; - } else { - fs.DCGain = DCGain; - } - - fs.FltOrig.Len2 = Len2; - fs.FltOrig.Freq = Freq; - fs.FltOrig.Alpha = FltAlpha; - fs.FltOrig.DCGain = fs.DCGain; - - CDSPPeakedCosineLPF w(Len2, Freq, FltAlpha); - - fs.IsUpsample = IsUpsample; - fs.ResampleFactor = ResampleFactor; - fs.FltLatency = w.fl2; - - int FltExt; // Filter's extension due to fpclass :: elalign. - - if (IsModel) { - allocFilter(fs.Flt, w.FilterLen, true, &FltExt); - - if (UseFltOrig) { - // Allocate a real buffer even in modeling mode since this - // filter may be copied by the filter bank. - - fs.FltOrig.alloc(w.FilterLen); - memset(&fs.FltOrig[0], 0, w.FilterLen * sizeof(fs.FltOrig[0])); - } - } else { - fs.FltOrig.alloc(w.FilterLen); - - w.generateLPF(&fs.FltOrig[0], 1.0); - optimizeFIRFilter(fs.FltOrig, fs.FltLatency); - normalizeFIRFilter(&fs.FltOrig[0], fs.FltOrig.getCapacity(), fs.DCGain); - - allocFilter(fs.Flt, fs.FltOrig.getCapacity(), false, &FltExt); - copyArray(&fs.FltOrig[0], &fs.Flt[0], fs.FltOrig.getCapacity()); - - if (!UseFltOrig) { - fs.FltOrig.free(); - } - } - - if (IsUpsample) { - int l = fs.Flt.getCapacity() - fs.FltLatency - ResampleFactor - FltExt; - - allocFilter(fs.PrefixDC, l, IsModel); - allocFilter(fs.SuffixDC, fs.FltLatency, IsModel); - - if (IsModel) { - return; - } - - // Create prefix and suffix "tails" used during upsampling. - - const fptype* ip = &fs.Flt[fs.FltLatency + ResampleFactor]; - copyArray(ip, &fs.PrefixDC[0], l); - - while (true) { - ip += ResampleFactor; - l -= ResampleFactor; - - if (l <= 0) { - break; - } - - addArray(ip, &fs.PrefixDC[0], l); - } - - l = fs.FltLatency; - fptype* op = &fs.SuffixDC[0]; - copyArray(&fs.Flt[0], op, l); - - while (true) { - op += ResampleFactor; - l -= ResampleFactor; - - if (l <= 0) { - break; - } - - addArray(&fs.Flt[0], op, l); - } - } else if (!UseFltOrig) { - fs.EdgePixelCount = fs.EdgePixelCountDef; - } - } - - /** - * Function adds a correction filter that tries to achieve a linear - * frequency response at all frequencies. The actual resulting response - * may feature a slight damping of the highest frequencies since a - * suitably short correction filter cannot fix steep high-frequency - * damping. - * - * This function assumes that the resizing step is currently the last - * step, even if it was not inserted yet: this allows placement of the - * correction filter both before and after the resizing step. - * - * @param Steps Filtering steps. - * @param bw Resulting bandwidth relative to the original bandwidth (which - * is 1.0), usually 1/k. Should be <= 1.0. - * @param IsPreCorrection "True" if the filtering step was already created - * and it is first in the Steps array. "True" also adds edge pixels to - * reduce edge artifacts. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - void addCorrectionFilter(CFilterSteps& Steps, const double bw, - const bool IsPreCorrection, - const bool IsModel) const { - CFilterStep& fs = (IsPreCorrection ? Steps[0] : Steps.add()); - fs.IsUpsample = false; - fs.ResampleFactor = 1; - fs.DCGain = 1.0; - fs.EdgePixelCount = (IsPreCorrection ? fs.EdgePixelCountDef : 0); - - if (IsModel) { - allocFilter( - fs.Flt, - CDSPFIREQ ::calcFilterLength(Params.CorrFltLen, fs.FltLatency), true); - - return; - } - - const int BinCount = 65; // Frequency response bins to control. - const int BinCount1 = BinCount - 1; - double curbw = 1.0; // Bandwidth of the filter at the current step. - int i; - int j; - double re; - double im; - - CBuffer Bins(BinCount); // Adjustment introduced by all - // steps at all frequencies of interest. - - for (j = 0; j < BinCount; j++) { - Bins[j] = 1.0; - } - - const int si = (IsPreCorrection ? 1 : 0); - - for (i = si; i < Steps.getItemCount() - (si ^ 1); i++) { - const CFilterStep& fs = Steps[i]; - - if (fs.IsUpsample) { - curbw *= fs.ResampleFactor; - - if (fs.FltOrig.getCapacity() > 0) { - continue; - } - } - - const double dcg = 1.0 / fs.DCGain; // DC gain correction. - const fptype* Flt; - int FltLen; - - if (fs.ResampleFactor == 0) { - Flt = fs.FltBank->getFilter(0); - FltLen = fs.FltBank->getFilterLen(); - } else { - Flt = &fs.Flt[0]; - FltLen = fs.Flt.getCapacity(); - } - - // Calculate frequency response adjustment introduced by the - // filter at this step, within the bounds of bandwidth of - // interest. - - for (j = 0; j < BinCount; j++) { - const double th = AVIR_PI * bw / curbw * j / BinCount1; - - calcFIRFilterResponse(Flt, FltLen, th, re, im); - - Bins[j] /= sqrt(re * re + im * im) * dcg; - } - - if (!fs.IsUpsample && fs.ResampleFactor > 1) { - curbw /= fs.ResampleFactor; - } - } - - // Calculate filter. - - CDSPFIREQ EQ; - EQ.init(bw * 2.0, Params.CorrFltLen, BinCount, 0.0, bw, false, - Params.CorrFltAlpha); - - fs.FltLatency = EQ.getFilterLatency(); - - CBuffer Filter(EQ.getFilterLength()); - EQ.buildFilter(Bins, &Filter[0]); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - optimizeFIRFilter(Filter, fs.FltLatency); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - - allocFilter(fs.Flt, Filter.getCapacity()); - copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity()); - - // Print a theoretically achieved final frequency response at various - // feature sizes (from DC to 1 pixel). Values above 255 means features - // become brighter, values below 255 means features become dimmer. - - /* const double sbw = ( bw > 1.0 ? 1.0 / bw : 1.0 ); - - for( j = 0; j < BinCount; j++ ) - { - const double th = AVIR_PI * sbw * j / BinCount1; - - calcFIRFilterResponse( &fs.Flt[ 0 ], - fs.Flt.getCapacity(), th, re, im ); - - printf( "%f\n", sqrt( re * re + im * im ) / Bins[ j - ] * 255 ); - } - - printf( "***\n" );*/ - } - - /** - * Function adds a sharpening filter if image is being upsized. Such - * sharpening allows to spot interpolation filter's stop-band attenuation: - * if attenuation is too weak, a "dark grid" and other artifacts may - * become visible. - * - * It is assumed that 40 decibel stop-band attenuation should be - * considered a required minimum: this allows application of (deliberately - * strong) 64X sharpening without spotting any artifacts. - * - * @param Steps Filtering steps. - * @param bw Resulting bandwidth relative to the original bandwidth (which - * is 1.0), usually 1/k. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - static void addSharpenTest(CFilterSteps& Steps, const double bw, - const bool IsModel) { - if (bw <= 1.0) { - return; - } - - const double FltLen = 10.0 * bw; - - CFilterStep& fs = Steps.add(); - fs.IsUpsample = false; - fs.ResampleFactor = 1; - fs.DCGain = 1.0; - fs.EdgePixelCount = 0; - - if (IsModel) { - allocFilter(fs.Flt, CDSPFIREQ ::calcFilterLength(FltLen, fs.FltLatency), - true); - - return; - } - - const int BinCount = 200; - CBuffer Bins(BinCount); - int Thresh = (int)round(BinCount / bw * 1.75); - - if (Thresh > BinCount) { - Thresh = BinCount; - } - - int j; - - for (j = 0; j < Thresh; j++) { - Bins[j] = 1.0; - } - - for (j = Thresh; j < BinCount; j++) { - Bins[j] = 256.0; - } - - CDSPFIREQ EQ; - EQ.init(bw * 2.0, FltLen, BinCount, 0.0, bw, false, 1.7); - - fs.FltLatency = EQ.getFilterLatency(); - - CBuffer Filter(EQ.getFilterLength()); - EQ.buildFilter(Bins, &Filter[0]); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - optimizeFIRFilter(Filter, fs.FltLatency); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - - allocFilter(fs.Flt, Filter.getCapacity()); - copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity()); - - /* for( j = 0; j < BinCount; j++ ) - { - const double th = AVIR_PI * j / ( BinCount - 1 ); - double re; - double im; - - calcFIRFilterResponse( &fs.Flt[ 0 ], - fs.Flt.getCapacity(), th, re, im ); - - printf( "%f\n", sqrt( re * re + im * im )); - } - - printf( "***\n" );*/ - } - - /** - * Function builds sequence of filtering steps depending on the specified - * resizing coefficient. The last steps included are always the resizing - * step then (possibly) the correction step. - * - * @param Steps Array that receives filtering steps. - * @param[out] Vars Variables object. - * @param FltBank Filter bank to initialize and use. - * @param DCGain The overall DC gain to apply. This DC gain is applied to - * the first filtering step only (upsampling or filtering step). - * @param ModeFlags Build mode flags to use. This is a bitmap of switches - * that enable or disable certain algorithm features. - * @param IsModel "True" if filtering steps modeling is performed without - * the actual filter allocation and building. - */ - - void buildFilterSteps(CFilterSteps& Steps, CImageResizerVars& Vars, - CDSPFracFilterBankLin& FltBank, - const double DCGain, const int ModeFlags, - const bool IsModel) const { - Steps.clear(); - - const bool DoFltAndIntCombo = - ((ModeFlags & 1) != 0); // Do filter - // and interpolator combining. - const bool ForceHiOrderInt = - ((ModeFlags & 2) != 0); // Force use - // of a higher-order interpolation. - const bool UseHalfband = ((ModeFlags & 4) != 0); // Use half-band - // filter. - - const double bw = 1.0 / Vars.k; // Resulting bandwidth. - const int UpsampleFactor = ((int)floor(Vars.k) < 2 ? 2 : 1); - double IntCutoffMult; // Interpolation filter cutoff multiplier. - CFilterStep* ReuseStep; // If not NULL, resizing step should use - // this step object instead of creating a new one. - CFilterStep* ExtFltStep; // Use FltOrig of this step as the external - // filter to applied to the interpolator. - bool IsPreCorrection; // "True" if the correction filter is applied - // first. - double FltCutoff; // Cutoff frequency of the first filtering step. - double corrbw; ///< Bandwidth at the correction step. - - if (Vars.k <= 1.0) { - IsPreCorrection = true; - FltCutoff = 1.0; - corrbw = 1.0; - Steps.add(); - } else { - IsPreCorrection = false; - FltCutoff = bw; - corrbw = bw; - } - - // Add 1 upsampling or several downsampling filters. - - if (UpsampleFactor > 1) { - CFilterStep& fs = Steps.add(); - assignFilterParams(fs, true, UpsampleFactor, FltCutoff, DCGain, - DoFltAndIntCombo, IsModel); - - IntCutoffMult = FltCutoff * 2.0 / UpsampleFactor; - ReuseStep = NULL; - ExtFltStep = (DoFltAndIntCombo ? &fs : NULL); - } else { - int DownsampleFactor; - - while (true) { - DownsampleFactor = (int)floor(0.5 / FltCutoff); - bool DoHBFltAdd; - - if (DownsampleFactor > 16) { - // Add half-band filter unconditionally in order to keep - // filter lengths lower for more precise frequency - // response and less edge artifacts. - - DoHBFltAdd = true; - DownsampleFactor = 16; - } else { - DoHBFltAdd = (UseHalfband && DownsampleFactor > 1); - } - - if (DoHBFltAdd) { - assignFilterParams(Steps.add(), false, DownsampleFactor, 0.0, 1.0, - false, IsModel); - - FltCutoff *= DownsampleFactor; - } else { - if (DownsampleFactor < 1) { - DownsampleFactor = 1; - } - - break; - } - } - - CFilterStep& fs = Steps.add(); - assignFilterParams(fs, false, DownsampleFactor, FltCutoff, DCGain, - DoFltAndIntCombo, IsModel); - - IntCutoffMult = FltCutoff / 0.5; - - if (DoFltAndIntCombo) { - ReuseStep = &fs; - ExtFltStep = &fs; - } else { - IntCutoffMult *= DownsampleFactor; - ReuseStep = NULL; - ExtFltStep = NULL; - } - } - - // Insert resizing and correction steps. - - CFilterStep& fs = (ReuseStep == NULL ? Steps.add() : *ReuseStep); - - Vars.ResizeStep = Steps.getItemCount() - 1; - fs.IsUpsample = false; - fs.ResampleFactor = 0; - fs.DCGain = (ExtFltStep == NULL ? 1.0 : ExtFltStep->DCGain); - - initFilterBank(FltBank, IntCutoffMult, ForceHiOrderInt, - (ExtFltStep == NULL ? fs.FltOrig : ExtFltStep->FltOrig)); - - if (FltBank == FixedFilterBank) { - fs.FltBank = (CDSPFracFilterBankLin*)&FixedFilterBank; - } else { - fs.FltBank = &FltBank; - } - - addCorrectionFilter(Steps, corrbw, IsPreCorrection, IsModel); - - // addSharpenTest( Steps, bw, IsModel ); - } - - /** - * Function extends *this upsampling step so that it produces more - * upsampled pixels that cover the prefix and suffix needs of the next - * step. After the call to this function the InPrefix and InSuffix - * variables of the next step will be set to zero. - * - * @param fs Upsampling filtering step. - * @param NextStep The next step structure. - */ - - static void extendUpsample(CFilterStep& fs, CFilterStep& NextStep) { - fs.InPrefix = - (NextStep.InPrefix + fs.ResampleFactor - 1) / fs.ResampleFactor; - - fs.OutPrefix += fs.InPrefix * fs.ResampleFactor; - NextStep.InPrefix = 0; - - fs.InSuffix = - (NextStep.InSuffix + fs.ResampleFactor - 1) / fs.ResampleFactor; - - fs.OutSuffix += fs.InSuffix * fs.ResampleFactor; - NextStep.InSuffix = 0; - } - - /** - * Function fills resizing step's RPosBuf array, excluding the actual - * "ftp" pointers and "SrcOffs" offsets. - * - * This array should be cleared if the resizing step or offset were - * changed. Otherwise this function only fills the elements required to - * cover resizing step's OutLen. - * - * This function is called by the updateFilterStepBuffers() function. - * - * @param fs Resizing step. - * @param Vars Variables object. - */ - - static void fillRPosBuf(CFilterStep& fs, const CImageResizerVars& Vars) { - const int PrevLen = fs.RPosBuf->getCapacity(); - - if (fs.OutLen > PrevLen) { - fs.RPosBuf->increaseCapacity(fs.OutLen); - } - - typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[PrevLen]; - const int FracCount = fs.FltBank->getFracCount(); - const double o = Vars.o; - const double k = Vars.k; - int i; - - for (i = PrevLen; i < fs.OutLen; i++) { - const double SrcPos = o + k * i; - const int SrcPosInt = (int)floor(SrcPos); - const double x = (SrcPos - SrcPosInt) * FracCount; - const int fti = (int)x; - rpos->x = (typename fpclass ::fptypeatom)(x - fti); - rpos->fti = fti; - rpos->SrcPosInt = SrcPosInt; - rpos++; - } - } - - /** - * Function updates filtering step buffer lengths depending on the - * specified source and new scanline lengths. This function should be - * called after the buildFilterSteps() function. - * - * @param Steps Array that receives filtering steps. - * @param[out] Vars Variables object, will receive buffer size and length. - * This function expects "k" and "o" variable values that will be - * adjusted by this function. - * @param RPosBufArray Resizing position buffers array, used to obtain - * buffer to initialize and use (will be reused if it is already fully or - * partially filled). - * @param SrcLen Source scanline's length in pixels. - * @param NewLen New scanline's length in pixels. - */ - - static void updateFilterStepBuffers( - CFilterSteps& Steps, CImageResizerVars& Vars, - typename CFilterStep ::CRPosBufArray& RPosBufArray, int SrcLen, - const int NewLen) { - int upstep = -1; - int InBuf = 0; - int i; - - for (i = 0; i < Steps.getItemCount(); i++) { - CFilterStep& fs = Steps[i]; - - fs.Vars = &Vars; - fs.InLen = SrcLen; - fs.InBuf = InBuf; - fs.OutBuf = (InBuf + 1) & 1; - - if (fs.IsUpsample) { - upstep = i; - Vars.k *= fs.ResampleFactor; - Vars.o *= fs.ResampleFactor; - fs.InPrefix = 0; - fs.InSuffix = 0; - fs.OutLen = fs.InLen * fs.ResampleFactor; - fs.OutPrefix = fs.FltLatency; - fs.OutSuffix = fs.Flt.getCapacity() - fs.FltLatency - fs.ResampleFactor; - - int l0 = fs.OutPrefix + fs.OutLen + fs.OutSuffix; - int l = fs.InLen * fs.ResampleFactor + fs.SuffixDC.getCapacity(); - - if (l > l0) { - fs.OutSuffix += l - l0; - } - - l0 = fs.OutLen + fs.OutSuffix; - - if (fs.PrefixDC.getCapacity() > l0) { - fs.OutSuffix += fs.PrefixDC.getCapacity() - l0; - } - } else if (fs.ResampleFactor == 0) { - const int FilterLenD2 = fs.FltBank->getFilterLen() / 2; - const int FilterLenD21 = FilterLenD2 - 1; - - const int ResizeLPix = (int)floor(Vars.o) - FilterLenD21; - fs.InPrefix = (ResizeLPix < 0 ? -ResizeLPix : 0); - const int ResizeRPix = - (int)floor(Vars.o + (NewLen - 1) * Vars.k) + FilterLenD2 + 1; - - fs.InSuffix = (ResizeRPix > fs.InLen ? ResizeRPix - fs.InLen : 0); - - fs.OutLen = NewLen; - fs.RPosBuf = &RPosBufArray.getRPosBuf(Vars.k, Vars.o, - fs.FltBank->getFracCount()); - - fillRPosBuf(fs, Vars); - } else { - Vars.k /= fs.ResampleFactor; - Vars.o /= fs.ResampleFactor; - Vars.o += fs.EdgePixelCount; - - fs.InPrefix = fs.FltLatency; - fs.InSuffix = fs.Flt.getCapacity() - fs.FltLatency - 1; - - // Additionally extend OutLen to produce more precise edge - // pixels. - - fs.OutLen = (fs.InLen + fs.ResampleFactor - 1) / fs.ResampleFactor + - fs.EdgePixelCount; - - fs.InSuffix += (fs.OutLen - 1) * fs.ResampleFactor + 1 - fs.InLen; - - fs.InPrefix += fs.EdgePixelCount * fs.ResampleFactor; - fs.OutLen += fs.EdgePixelCount; - } - - InBuf = fs.OutBuf; - SrcLen = fs.OutLen; - } - - Steps[Steps.getItemCount() - 1].OutBuf = 2; - - if (upstep != -1) { - extendUpsample(Steps[upstep], Steps[upstep + 1]); - } - } - - /** - * Function calculates an optimal intermediate buffer length that will - * cover all needs of the specified filtering steps. This function should - * be called after the updateFilterStepBuffers() function. - * - * Function also updates resizing step's RPosBuf pointers to the filter - * bank and SrcOffs values. - * - * @param Steps Filtering steps. - * @param[out] Vars Variables object, will receive buffer size and length. - * @param ResElIncr Resulting (final) element increment, used to produce - * de-interleaved result. For horizontal processing this value is equal - * to last step's OutLen, for vertical processing this value is equal to - * resulting image's width. - */ - - static void updateBufLenAndRPosPtrs(CFilterSteps& Steps, - CImageResizerVars& Vars, - const int ResElIncr) { - int MaxPrefix[2] = {0, 0}; - int MaxLen[2] = {0, 0}; - int i; - - for (i = 0; i < Steps.getItemCount(); i++) { - CFilterStep& fs = Steps[i]; - const int ib = fs.InBuf; - - if (fs.InPrefix > MaxPrefix[ib]) { - MaxPrefix[ib] = fs.InPrefix; - } - - int l = fs.InLen + fs.InSuffix; - - if (l > MaxLen[ib]) { - MaxLen[ib] = l; - } - - fs.InElIncr = fs.InPrefix + l; - - if (fs.OutBuf == 2) { - break; - } - - const int ob = fs.OutBuf; - - if (fs.IsUpsample) { - if (fs.OutPrefix > MaxPrefix[ob]) { - MaxPrefix[ob] = fs.OutPrefix; - } - - l = fs.OutLen + fs.OutSuffix; - - if (l > MaxLen[ob]) { - MaxLen[ob] = l; - } - } else { - if (fs.OutLen > MaxLen[ob]) { - MaxLen[ob] = fs.OutLen; - } - } - } - - // Update OutElIncr values of all steps. - - for (i = 0; i < Steps.getItemCount(); i++) { - CFilterStep& fs = Steps[i]; - - if (fs.OutBuf == 2) { - fs.OutElIncr = ResElIncr; - break; - } - - CFilterStep& fs2 = Steps[i + 1]; - - if (fs.IsUpsample) { - fs.OutElIncr = fs.OutPrefix + fs.OutLen + fs.OutSuffix; - - if (fs.OutElIncr > fs2.InElIncr) { - fs2.InElIncr = fs.OutElIncr; - } else { - fs.OutElIncr = fs2.InElIncr; - } - } else { - fs.OutElIncr = fs2.InElIncr; - } - } - - // Update temporary buffer's length. - - for (i = 0; i < 2; i++) { - Vars.BufLen[i] = MaxPrefix[i] + MaxLen[i]; - Vars.BufOffs[i] = MaxPrefix[i]; - - if (Vars.packmode == 0) { - Vars.BufOffs[i] *= Vars.ElCount; - } - - Vars.BufLen[i] *= Vars.ElCount; - } - - // Update RPosBuf pointers and SrcOffs. - - CFilterStep& fs = Steps[Vars.ResizeStep]; - typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0]; - const int em = (fpclass ::packmode == 0 ? Vars.ElCount : 1); - const int FilterLenD21 = fs.FltBank->getFilterLen() / 2 - 1; - - for (i = 0; i < fs.OutLen; i++) { - rpos->ftp = fs.FltBank->getFilter(rpos->fti); - rpos->SrcOffs = (rpos->SrcPosInt - FilterLenD21) * em; - rpos++; - } - } - - /** - * Function modifies the overall (DC) gain of the correction filter in the - * pre-built filtering steps array. - * - * @param Steps Filtering steps. - * @param m Multiplier to apply to the correction filter. - */ - - void modifyCorrFilterDCGain(CFilterSteps& Steps, const double m) const { - CBuffer* Flt; - const int z = Steps.getItemCount() - 1; - - if (!Steps[z].IsUpsample && Steps[z].ResampleFactor == 1) { - Flt = &Steps[z].Flt; - } else { - Flt = &Steps[0].Flt; - } - - int i; - - for (i = 0; i < Flt->getCapacity(); i++) { - (*Flt)[i] = (fptype)((double)(*Flt)[i] * m); - } - } - - /** - * Function builds a map of used fractional delay filters based on the - * resizing positions buffer. - * - * @param fs Resizing step. - * @param[out] UsedFracMap Map of used fractional delay filters. - */ - - static void fillUsedFracMap(const CFilterStep& fs, - CBuffer& UsedFracMap) { - const int FracCount = fs.FltBank->getFracCount(); - UsedFracMap.increaseCapacity(FracCount, false); - memset(&UsedFracMap[0], 0, FracCount * sizeof(UsedFracMap[0])); - - typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0]; - int i; - - for (i = 0; i < fs.OutLen; i++) { - UsedFracMap[rpos->fti] |= 1; - rpos++; - } - } - - /** - * Function calculates the overall filtering steps complexity per - * scanline. Each complexity unit corresponds to a single multiply-add - * operation. Data copy and pointer math operations are not included in - * this calculation, it is assumed that they correlate to the multiply-add - * operations. Calculation also does not include final rounding, dithering - * and clamping operations since they cannot be optimized out anyway. - * - * Calculation of the CRPosBuf buffer is not included since it cannot be - * avoided. - * - * This function should be called after the updateFilterStepBuffers() - * function. - * - * @param Steps Filtering steps array. - * @param Vars Variables object. - * @param UsedFracMap The map of used fractional delay filters. - * @param ScanlineCount Scanline count. - */ - - static int calcComplexity(const CFilterSteps& Steps, - const CImageResizerVars& Vars, - const CBuffer& UsedFracMap, - const int ScanlineCount) { - int fcnum; // Filter complexity multiplier numerator. - int fcdenom; // Filter complexity multiplier denominator. - - if (Vars.packmode != 0) { - fcnum = 1; - fcdenom = 1; - } else { - // In interleaved processing mode, filters require 1 less - // multiplication per 2 multiply-add instructions. - - fcnum = 3; - fcdenom = 4; - } - - int s = 0; // Complexity per one scanline. - int s2 = 0; // Complexity per all scanlines. - int i; - - for (i = 0; i < Steps.getItemCount(); i++) { - const CFilterStep& fs = Steps[i]; - - s2 += 65 * fs.Flt.getCapacity(); // Filter creation complexity. - - if (fs.IsUpsample) { - if (fs.FltOrig.getCapacity() > 0) { - continue; - } - - s += (fs.Flt.getCapacity() * (fs.InPrefix + fs.InLen + fs.InSuffix) + - fs.SuffixDC.getCapacity() + fs.PrefixDC.getCapacity()) * - Vars.ElCount; - } else if (fs.ResampleFactor == 0) { - s += fs.FltBank->getFilterLen() * - (fs.FltBank->getOrder() + Vars.ElCount) * fs.OutLen; - - s2 += fs.FltBank->calcInitComplexity(UsedFracMap); - } else { - s += fs.Flt.getCapacity() * Vars.ElCount * fs.OutLen * fcnum / fcdenom; - } - } - - return (s + s2 / ScanlineCount); - } - - /** - * @brief Thread-isolated data used for scanline processing. - * - * This structure holds data necessary for image's horizontal or vertical - * scanline processing, including scanline processing queue. - * - * @tparam Tin Source element data type. Intermediate buffers store data - * in floating point format. - * @tparam Tout Destination element data type. Intermediate buffers store - * data in floating point format. - */ - - template - class CThreadData : public CImageResizerThreadPool ::CWorkload { - public: - virtual void process() { processScanlineQueue(); } - - /** - * This enumeration lists possible scanline operations. - */ - - enum EScanlineOperation { - sopResizeH, ///< Resize horizontal scanline. - ///< - sopResizeV, ///< Resize vertical scanline. - ///< - sopDitherAndUnpackH, ///< Dither and unpack horizontal scanline. - ///< - sopUnpackH ///< Unpack horizontal scanline. - ///< - }; - - /** - * Function initializes *this thread data object and assigns certain - * variables provided by the higher level code. - * - * @param aThreadIndex Index of this thread data (0-based). - * @param aThreadCount Total number of threads used during processing. - * @param aSteps Filtering steps. - * @param aVars Image resizer variables. - */ - - void init(const int aThreadIndex, const int aThreadCount, - const CFilterSteps& aSteps, const CImageResizerVars& aVars) { - ThreadIndex = aThreadIndex; - ThreadCount = aThreadCount; - Steps = &aSteps; - Vars = &aVars; - } - - /** - * Function initializes scanline processing queue, and updates - * capacities of intermediate buffers. - * - * @param aOp Operation to perform over scanline. - * @param TotalLines The total number of scanlines that will be - * processed by all threads. - * @param aSrcLen Source scanline length in pixels. - * @param aSrcIncr Source scanline buffer increment. Ignored in - * horizontal scanline processing. - * @param aResIncr Resulting scanline buffer increment. Ignored in - * horizontal scanline processing. - */ - - void initScanlineQueue(const EScanlineOperation aOp, const int TotalLines, - const int aSrcLen, const int aSrcIncr = 0, - const int aResIncr = 0) { - const int l = Vars->BufLen[0] + Vars->BufLen[1]; - - if (Bufs.getCapacity() < l) { - Bufs.alloc(l, fpclass ::fpalign); - } - - BufPtrs[0] = Bufs + Vars->BufOffs[0]; - BufPtrs[1] = Bufs + Vars->BufLen[0] + Vars->BufOffs[1]; - - int j; - int ml = 0; - - for (j = 0; j < Steps->getItemCount(); j++) { - const CFilterStep& fs = (*Steps)[j]; - - if (fs.ResampleFactor == 0 && ml < fs.FltBank->getFilterLen()) { - ml = fs.FltBank->getFilterLen(); - } - } - - TmpFltBuf.alloc(ml, fpclass ::fpalign); - ScanlineOp = aOp; - SrcLen = aSrcLen; - SrcIncr = aSrcIncr; - ResIncr = aResIncr; - QueueLen = 0; - Queue.increaseCapacity((TotalLines + ThreadCount - 1) / ThreadCount, - false); - } - - /** - * Function adds a scanline to the queue buffer. The - * initScanlineQueue() function should be called before calling this - * function. The number of calls to this add function should not - * exceed the TotalLines spread over all threads. - * - * @param SrcBuf Source scanline buffer. - * @param ResBuf Resulting scanline buffer. - */ - - void addScanlineToQueue(void* const SrcBuf, void* const ResBuf) { - Queue[QueueLen].SrcBuf = SrcBuf; - Queue[QueueLen].ResBuf = ResBuf; - QueueLen++; - } - - /** - * Function processes all queued scanlines. - */ - - void processScanlineQueue() { - int i; - - switch (ScanlineOp) { - case sopResizeH: { - for (i = 0; i < QueueLen; i++) { - resizeScanlineH((Tin*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf); - } - - break; - } - - case sopResizeV: { - for (i = 0; i < QueueLen; i++) { - resizeScanlineV((fptype*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf); - } - - break; - } - - case sopDitherAndUnpackH: { - if (Vars->UseSRGBGamma) { - for (i = 0; i < QueueLen; i++) { - CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen, - *Vars); - - Ditherer.dither((fptype*)Queue[i].SrcBuf); - - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } else { - for (i = 0; i < QueueLen; i++) { - Ditherer.dither((fptype*)Queue[i].SrcBuf); - - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } - - break; - } - - case sopUnpackH: { - if (Vars->UseSRGBGamma) { - for (i = 0; i < QueueLen; i++) { - CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen, - *Vars); - - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } else { - for (i = 0; i < QueueLen; i++) { - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } - - break; - } - } - } - - /** - * Function returns ditherer object associated with *this thread data - * object. - */ - - CDitherer& getDitherer() { return (Ditherer); } - - private: - int ThreadIndex; ///< Thread index. - ///< - int ThreadCount; ///< Thread count. - ///< - const CFilterSteps* Steps; ///< Filtering steps. - ///< - const CImageResizerVars* Vars; ///< Image resizer variables. - ///< - CBuffer Bufs; ///< Flip-flop intermediate buffers. - ///< - fptype* BufPtrs[3]; ///< Flip-flop buffer pointers (referenced by - ///< filtering step's InBuf and OutBuf indices). - ///< - CBuffer - TmpFltBuf; ///< Temporary buffer used in the - ///< doResize() function, aligned by fpclass :: fpalign. - ///< - EScanlineOperation ScanlineOp; ///< Operation to perform over - ///< scanline. - ///< - int SrcLen; ///< Source scanline length in the last queue. - ///< - int SrcIncr; ///< Source scanline buffer increment in the last queue. - ///< - int ResIncr; ///< Resulting scanline buffer increment in the last - ///< queue. - ///< - CDitherer Ditherer; ///< Ditherer object to use. - ///< - - /** - * @brief Scanline processing queue item. - * - * Scanline processing queue item. - */ - - struct CQueueItem { - void* SrcBuf; ///< Source scanline buffer, will by typecasted to - ///< Tin or fptype*. - ///< - void* ResBuf; ///< Resulting scanline buffer, will by typecasted - ///< to Tout or fptype*. - ///< - }; - - CBuffer Queue; ///< Scanline processing queue. - ///< - int QueueLen; ///< Queue length. - ///< - - /** - * Function resizes a single horizontal scanline. - * - * @param SrcBuf Source scanline buffer. Can be either horizontal or - * vertical. - * @param ResBuf Resulting scanline buffer. - */ - - void resizeScanlineH(const Tin* const SrcBuf, fptype* const ResBuf) { - (*Steps)[0].packScanline(SrcBuf, BufPtrs[0], SrcLen); - BufPtrs[2] = ResBuf; - int j; - - for (j = 0; j < Steps->getItemCount(); j++) { - const CFilterStep& fs = (*Steps)[j]; - fs.prepareInBuf(BufPtrs[fs.InBuf]); - const int DstIncr = (Vars->packmode == 0 ? Vars->ElCount : 1); - - if (fs.ResampleFactor != 0) { - if (fs.IsUpsample) { - fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]); - } else { - fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr); - } - } else { - fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr, - TmpFltBuf); - } - } - } - - /** - * Function resizes a single vertical scanline. - * - * @param SrcBuf Source scanline buffer. Can be either horizontal or - * vertical. - * @param ResBuf Resulting scanline buffer. - */ - - void resizeScanlineV(const fptype* const SrcBuf, fptype* const ResBuf) { - (*Steps)[0].convertVtoH(SrcBuf, BufPtrs[0], SrcLen, SrcIncr); - - BufPtrs[2] = ResBuf; - int j; - - for (j = 0; j < Steps->getItemCount(); j++) { - const CFilterStep& fs = (*Steps)[j]; - fs.prepareInBuf(BufPtrs[fs.InBuf]); - const int DstIncr = - (fs.OutBuf == 2 ? ResIncr - : (Vars->packmode == 0 ? Vars->ElCount : 1)); - - if (fs.ResampleFactor != 0) { - if (fs.IsUpsample) { - fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]); - } else { - fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr); - } - } else { - fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr, - TmpFltBuf); - } - } - } - }; -}; - -#undef AVIR_PI -#undef AVIR_PId2 - -} // namespace avir - -#endif // AVIR_CIMAGERESIZER_INCLUDED -//$ nobt -//$ nocpp - -/** - * @file avir.h - * - * @brief The "main" inclusion file with all required classes and functions. - * - * This is the "main" inclusion file for the "AVIR" image resizer. This - * inclusion file contains implementation of the AVIR image resizing algorithm - * in its entirety. Also includes several classes and functions that can be - * useful elsewhere. - * - * AVIR Copyright (c) 2015-2019 Aleksey Vaneev - * - * @mainpage - * - * @section intro_sec Introduction - * - * Description is available at https://github.com/avaneev/avir - * - * AVIR is devoted to women. Your digital photos can look good at any size! - * - * @section license License - * - * AVIR License Agreement - * - * The MIT License (MIT) - * - * Copyright (c) 2015-2019 Aleksey Vaneev - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Please credit the author of this library in your documentation in the - * following way: "AVIR image resizing algorithm designed by Aleksey Vaneev" - * - * @version 2.4 - */ - -#ifndef AVIR_CIMAGERESIZER_INCLUDED -#define AVIR_CIMAGERESIZER_INCLUDED - -#include -#include -#include -#include - -namespace avir { - -/** - * The macro defines AVIR version string. - */ - -#define AVIR_VERSION "2.4" - -/** - * The macro equals to "pi" constant, fills 53-bit floating point mantissa. - * Undefined at the end of file. - */ - -#define AVIR_PI 3.1415926535897932 - -/** - * The macro equals to "pi divided by 2" constant, fills 53-bit floating - * point mantissa. Undefined at the end of file. - */ - -#define AVIR_PId2 1.5707963267948966 - -/** - * Rounding function, based on the (int) typecast. Biased result. Not suitable - * for numbers >= 2^31. - * - * @param d Value to round. - * @return Rounded value. Some bias may be introduced. - */ - -template -inline T round(const T d) { - return (d < 0.0 ? -(T)(int)((T)0.5 - d) : (T)(int)(d + (T)0.5)); -} - -/** - * Template function "clamps" (clips) the specified value so that it is not - * lesser than "minv", and not greater than "maxv". - * - * @param Value Value to clamp. - * @param minv Minimal allowed value. - * @param maxv Maximal allowed value. - * @return The clamped value. - */ - -template -inline T clamp(const T& Value, const T minv, const T maxv) { - if (Value < minv) { - return (minv); - } else if (Value > maxv) { - return (maxv); - } else { - return (Value); - } -} - -/** - * Power 2.4 approximation function, designed for sRGB gamma correction. - * - * @param x Argument, in the range 0.09 to 1. - * @return Value raised into power 2.4, approximate. - */ - -template -inline T pow24_sRGB(const T x) { - const double x2 = x * x; - const double x3 = x2 * x; - const double x4 = x2 * x2; - - return ((T)(0.0985766365536824 + 0.839474952656502 * x2 + - 0.363287814061725 * x3 - - 0.0125559718896615 / (0.12758338921578 + 0.290283465468235 * x) - - 0.231757513261358 * x - 0.0395365717969074 * x4)); -} - -/** - * Power 1/2.4 approximation function, designed for sRGB gamma correction. - * - * @param x Argument, in the range 0.003 to 1. - * @return Value raised into power 1/2.4, approximate. - */ - -template -inline T pow24i_sRGB(const T x) { - const double sx = sqrt(x); - const double ssx = sqrt(sx); - const double sssx = sqrt(ssx); - - return ((T)(0.000213364515060263 + 0.0149409239419218 * x + - 0.433973412731747 * sx + - ssx * (0.659628181609715 * sssx - 0.0380957908841466 - - 0.0706476137208521 * sx))); -} - -/** - * Function approximately linearizes the sRGB gamma value. - * - * @param s sRGB gamma value, in the range 0 to 1. - * @return Linearized sRGB gamma value, approximated. - */ - -template -inline T convertSRGB2Lin(const T s) { - const T a = (T)0.055; - - if (s <= (T)0.04045) { - return (s / (T)12.92); - } - - return (pow24_sRGB((s + a) / ((T)1 + a))); -} - -/** - * Function approximately de-linearizes the linear gamma value. - * - * @param s Linear gamma value, in the range 0 to 1. - * @return sRGB gamma value, approximated. - */ - -template -inline T convertLin2SRGB(const T s) { - const T a = (T)0.055; - - if (s <= (T)0.0031308) { - return ((T)12.92 * s); - } - - return (((T)1 + a) * pow24i_sRGB(s) - a); -} - -/** - * Function converts (via typecast) specified array of type T1 values of - * length l into array of type T2 values. If T1 is the same as T2, copy - * operation is performed. When copying data at overlapping address spaces, - * "op" should be lower than "ip". - * - * @param ip Input buffer. - * @param[out] op Output buffer. - * @param l The number of elements to copy. - * @param ip Input buffer pointer increment. - * @param op Output buffer pointer increment. - */ - -template -inline void copyArray(const T1* ip, T2* op, int l, const int ipinc = 1, - const int opinc = 1) { - while (l > 0) { - *op = (T2)*ip; - op += opinc; - ip += ipinc; - l--; - } -} - -/** - * Function adds values located in array "ip" to array "op". - * - * @param ip Input buffer. - * @param[out] op Output buffer. - * @param l The number of elements to add. - * @param ip Input buffer pointer increment. - * @param op Output buffer pointer increment. - */ - -template -inline void addArray(const T1* ip, T2* op, int l, const int ipinc = 1, - const int opinc = 1) { - while (l > 0) { - *op += *ip; - op += opinc; - ip += ipinc; - l--; - } -} - -/** - * Function that replicates a set of adjacent elements several times in a row. - * This operation is usually used to replicate pixels at the start or end of - * image's scanline. - * - * @param ip Source array. - * @param ipl Source array length (usually 1..4, but can be any number). - * @param[out] op Destination buffer. - * @param l Number of times the source array should be replicated (the - * destination buffer should be able to hold ipl * l number of elements). - * @param opinc Destination buffer position increment after replicating the - * source array. This value should be equal to at least ipl. - */ - -template -inline void replicateArray(const T1* const ip, const int ipl, T2* op, int l, - const int opinc) { - if (ipl == 1) { - while (l > 0) { - op[0] = ip[0]; - op += opinc; - l--; - } - } else if (ipl == 4) { - while (l > 0) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - op[3] = ip[3]; - op += opinc; - l--; - } - } else if (ipl == 3) { - while (l > 0) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - op += opinc; - l--; - } - } else if (ipl == 2) { - while (l > 0) { - op[0] = ip[0]; - op[1] = ip[1]; - op += opinc; - l--; - } - } else { - while (l > 0) { - int i; - - for (i = 0; i < ipl; i++) { - op[i] = ip[i]; - } - - op += opinc; - l--; - } - } -} - -/** - * Function calculates frequency response of the specified FIR filter at the - * specified circular frequency. Phase can be calculated as atan2( im, re ). - * Function uses computationally-efficient oscillators instead of "cos" and - * "sin" functions. - * - * @param flt FIR filter's coefficients. - * @param fltlen Number of coefficients (taps) in the filter. - * @param th Circular frequency [0; pi]. - * @param[out] re0 Resulting real part of the complex frequency response. - * @param[out] im0 Resulting imaginary part of the complex frequency response. - * @param fltlat Filter's latency in samples (taps). - */ - -template -inline void calcFIRFilterResponse(const T* flt, int fltlen, const double th, - double& re0, double& im0, - const int fltlat = 0) { - const double sincr = 2.0 * cos(th); - double cvalue1; - double svalue1; - - if (fltlat == 0) { - cvalue1 = 1.0; - svalue1 = 0.0; - } else { - cvalue1 = cos(-fltlat * th); - svalue1 = sin(-fltlat * th); - } - - double cvalue2 = cos(-(fltlat + 1) * th); - double svalue2 = sin(-(fltlat + 1) * th); - - double re = 0.0; - double im = 0.0; - - while (fltlen > 0) { - re += cvalue1 * flt[0]; - im += svalue1 * flt[0]; - flt++; - fltlen--; - - double tmp = cvalue1; - cvalue1 = sincr * cvalue1 - cvalue2; - cvalue2 = tmp; - - tmp = svalue1; - svalue1 = sincr * svalue1 - svalue2; - svalue2 = tmp; - } - - re0 = re; - im0 = im; -} - -/** - * Function normalizes FIR filter so that its frequency response at DC is - * equal to DCGain. - * - * @param[in,out] p Filter coefficients. - * @param l Filter length. - * @param DCGain Filter's gain at DC. - * @param pstep "p" array step. - */ - -template -inline void normalizeFIRFilter(T* const p, const int l, const double DCGain, - const int pstep = 1) { - double s = 0.0; - T* pp = p; - int i = l; - - while (i > 0) { - s += *pp; - pp += pstep; - i--; - } - - s = DCGain / s; - pp = p; - i = l; - - while (i > 0) { - *pp = (T)(*pp * s); - pp += pstep; - i--; - } -} - -/** - * @brief Memory buffer class for element array storage, with capacity - * tracking. - * - * Allows easier handling of memory blocks allocation and automatic - * deallocation for arrays (buffers) consisting of elements of specified - * class. Tracks buffer's capacity in "int" variable; unsuitable for - * allocation of very large memory blocks (with more than 2 billion elements). - * - * This class manages memory space only - it does not perform element class - * construction (initialization) operations. Buffer's required memory address - * alignment specification is supported. - * - * Uses standard library to allocate and deallocate memory. - * - * @tparam T Buffer element's type. - * @tparam capint Buffer capacity's type to use. Use size_t for large buffers. - */ - -template -class CBuffer { - public: - CBuffer() : Data(NULL), DataAligned(NULL), Capacity(0), Alignment(0) {} - - /** - * Constructor creates the buffer with the specified capacity. - * - * @param aCapacity Buffer's capacity. - * @param aAlignment Buffer's required memory address alignment. 0 - use - * stdlib's default alignment. - */ - - CBuffer(const capint aCapacity, const int aAlignment = 0) { - allocinit(aCapacity, aAlignment); - } - - CBuffer(const CBuffer& Source) { - allocinit(Source.Capacity, Source.Alignment); - memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T)); - } - - ~CBuffer() { freeData(); } - - CBuffer& operator=(const CBuffer& Source) { - alloc(Source.Capacity, Source.Alignment); - memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T)); - return (*this); - } - - /** - * Function allocates memory so that the specified number of elements - * can be stored in *this buffer object. - * - * @param aCapacity Storage for this number of elements to allocate. - * @param aAlignment Buffer's required memory address alignment, - * power-of-2 values only. 0 - use stdlib's default alignment. - */ - - void alloc(const capint aCapacity, const int aAlignment = 0) { - freeData(); - allocinit(aCapacity, aAlignment); - } - - /** - * Function deallocates any previously allocated buffer. - */ - - void free() { - freeData(); - Data = NULL; - DataAligned = NULL; - Capacity = 0; - Alignment = 0; - } - - /** - * @return The capacity of the element buffer. - */ - - capint getCapacity() const { return (Capacity); } - - /** - * Function "forces" *this buffer to have an arbitary capacity. Calling - * this function invalidates all further operations except deleting *this - * object. This function should not be usually used at all. Function can - * be used to "model" certain buffer capacity without calling a costly - * memory allocation function. - * - * @param NewCapacity A new "forced" capacity. - */ - - void forceCapacity(const capint NewCapacity) { Capacity = NewCapacity; } - - /** - * Function reallocates *this buffer to a larger size so that it will be - * able to hold the specified number of elements. Downsizing is not - * performed. Alignment is not changed. - * - * @param NewCapacity New (increased) capacity. - * @param DoDataCopy "True" if data in the buffer should be retained. - */ - - void increaseCapacity(const capint NewCapacity, - const bool DoDataCopy = true) { - if (NewCapacity < Capacity) { - return; - } - - if (DoDataCopy) { - const capint PrevCapacity = Capacity; - T* const PrevData = Data; - T* const PrevDataAligned = DataAligned; - - allocinit(NewCapacity, Alignment); - memcpy(DataAligned, PrevDataAligned, PrevCapacity * sizeof(T)); - - ::free(PrevData); - } else { - ::free(Data); - allocinit(NewCapacity, Alignment); - } - } - - /** - * Function "truncates" (reduces) capacity of the buffer without - * reallocating it. Alignment is not changed. - * - * @param NewCapacity New required capacity. - */ - - void truncateCapacity(const capint NewCapacity) { - if (NewCapacity >= Capacity) { - return; - } - - Capacity = NewCapacity; - } - - /** - * Function increases capacity so that the specified number of - * elements can be stored. This function increases the previous capacity - * value by third the current capacity value until space for the required - * number of elements is available. Alignment is not changed. - * - * @param ReqCapacity Required capacity. - */ - - void updateCapacity(const capint ReqCapacity) { - if (ReqCapacity <= Capacity) { - return; - } - - capint NewCapacity = Capacity; - - while (NewCapacity < ReqCapacity) { - NewCapacity += NewCapacity / 3 + 1; - } - - increaseCapacity(NewCapacity); - } - - operator T*() const { return (DataAligned); } - - private: - T* Data; ///< Element buffer pointer. - ///< - T* DataAligned; ///< Memory address-aligned element buffer pointer. - ///< - capint Capacity; ///< Element buffer capacity. - ///< - int Alignment; ///< Memory address alignment in use. 0 - use stdlib's - ///< default alignment. - ///< - - /** - * Internal element buffer allocation function used during object - * construction. - * - * @param aCapacity Storage for this number of elements to allocate. - * @param aAlignment Buffer's required memory address alignment. 0 - use - * stdlib's default alignment. - */ - - void allocinit(const capint aCapacity, const int aAlignment) { - if (aAlignment == 0) { - Data = (T*)::malloc(aCapacity * sizeof(T)); - DataAligned = Data; - Alignment = 0; - } else { - Data = (T*)::malloc(aCapacity * sizeof(T) + aAlignment); - DataAligned = alignptr(Data, aAlignment); - Alignment = aAlignment; - } - - Capacity = aCapacity; - } - - /** - * Function frees a previously allocated Data buffer. - */ - - void freeData() { ::free(Data); } - - /** - * Function modifies the specified pointer so that it becomes memory - * address-aligned. - * - * @param ptr Pointer to align. - * @param align Alignment in bytes to apply. - * @return Pointer aligned to align bytes. Works with power-of-2 - * alignments only. If no alignment is necessary, "align" bytes will be - * added to the pointer value. - */ - - template - inline Tp alignptr(const Tp ptr, const uintptr_t align) { - return ((Tp)((uintptr_t)ptr + align - ((uintptr_t)ptr & (align - 1)))); - } -}; - -/** - * Function optimizes the length of the symmetric-odd FIR filter by removing - * left- and rightmost elements that are below specific threshold. - * - * Synthetic test shows that filter gets optimized in 2..3% of cases and in - * each such case optimization reduces filter length by 6..8%. Optimization, - * however, may skew the results of algorithm modeling and complexity - * calculation leading to a choice of a less optimal algorithm. - * - * @param[in,out] Flt Buffer that contains filter being optimized. - * @param[in,out] FltLatency Variable that holds the current latency of the - * filter. May be adjusted on function return. - * @param Threshold Threshold level. - */ - -template -inline void optimizeFIRFilter(CBuffer& Flt, int& FltLatency, - T const Threshold = (T)0.00001) { - int i; - - // Optimize length. - - for (i = 0; i <= FltLatency; i++) { - if (fabs(Flt[i]) >= Threshold || i == FltLatency) { - if (i > 0) { - const int NewCapacity = Flt.getCapacity() - i * 2; - copyArray(&Flt[i], &Flt[0], NewCapacity); - Flt.truncateCapacity(NewCapacity); - FltLatency -= i; - } - - break; - } - } -} - -/** - * @brief Array of structured objects. - * - * Implements allocation of a linear array of objects of class T (which are - * initialized), addressable via operator[]. Each object is created via the - * "operator new". New object insertions are quick since implementation uses - * prior space allocation (capacity), thus not requiring frequent memory block - * reallocations. - * - * @tparam T Array element's type. - */ - -template -class CStructArray { - public: - CStructArray() : ItemCount(0) {} - - CStructArray(const CStructArray& Source) - : ItemCount(0), Items(Source.getItemCount()) { - while (ItemCount < Source.getItemCount()) { - Items[ItemCount] = new T(Source[ItemCount]); - ItemCount++; - } - } - - ~CStructArray() { clear(); } - - CStructArray& operator=(const CStructArray& Source) { - clear(); - - const int NewCount = Source.ItemCount; - Items.updateCapacity(NewCount); - - while (ItemCount < NewCount) { - Items[ItemCount] = new T(Source[ItemCount]); - ItemCount++; - } - - return (*this); - } - - T& operator[](const int Index) { return (*Items[Index]); } - - const T& operator[](const int Index) const { return (*Items[Index]); } - - /** - * Function creates a new object of type T with the default constructor - * and adds this object to the array. - * - * @return Reference to a newly added object. - */ - - T& add() { - if (ItemCount == Items.getCapacity()) { - Items.increaseCapacity(ItemCount * 3 / 2 + 1); - } - - Items[ItemCount] = new T(); - ItemCount++; - - return ((*this)[ItemCount - 1]); - } - - /** - * Function changes number of allocated items. New items are created with - * the default constructor. If NewCount is below the current item count, - * items that are above NewCount range will be destructed. - * - * @param NewCount New requested item count. - */ - - void setItemCount(const int NewCount) { - if (NewCount > ItemCount) { - Items.increaseCapacity(NewCount); - - while (ItemCount < NewCount) { - Items[ItemCount] = new T(); - ItemCount++; - } - } else { - while (ItemCount > NewCount) { - ItemCount--; - delete Items[ItemCount]; - } - } - } - - /** - * Function erases all items of *this array. - */ - - void clear() { - while (ItemCount > 0) { - ItemCount--; - delete Items[ItemCount]; - } - } - - /** - * @return The number of allocated items. - */ - - int getItemCount() const { return (ItemCount); } - - private: - int ItemCount; ///< The number of items available in the array. - ///< - CBuffer Items; ///< Element buffer. - ///< -}; - -/** - * @brief Sine signal generator class. - * - * Class implements sine signal generator without biasing, with - * constructor-based initalization only. This generator uses oscillator - * instead of "sin" function. - */ - -class CSineGen { - public: - /** - * Constructor initializes *this sine signal generator. - * - * @param si Sine function increment, in radians. - * @param ph Starting phase, in radians. Add 0.5 * AVIR_PI for cosine - * function. - */ - - CSineGen(const double si, const double ph) - : svalue1(sin(ph)), svalue2(sin(ph - si)), sincr(2.0 * cos(si)) {} - - /** - * @return The next value of the sine function, without biasing. - */ - - double generate() { - const double res = svalue1; - - svalue1 = sincr * res - svalue2; - svalue2 = res; - - return (res); - } - - private: - double svalue1; ///< Current sine value. - ///< - double svalue2; ///< Previous sine value. - ///< - double sincr; ///< Sine value increment. - ///< -}; - -/** - * @brief Peaked Cosine window function generator class. - * - * Class implements Peaked Cosine window function generator. Generates the - * right-handed half of the window function. The Alpha parameter of this - * window function offers the control of the balance between the early and - * later taps of the filter. E.g. at Alpha=1 both early and later taps are - * attenuated, but at Alpha=4 mostly later taps are attenuated. This offers a - * great control over ringing artifacts produced by a low-pass filter in image - * processing, without compromising achieved image sharpness. - */ - -class CDSPWindowGenPeakedCosine { - public: - /** - * Constructor initializes *this window function generator. - * - * @param aAlpha Alpha parameter, affects the peak shape (peak - * augmentation) of the window function. Should be >= 1.0. - * @param aLen2 Half filter's length (non-truncated). - */ - - CDSPWindowGenPeakedCosine(const double aAlpha, const double aLen2) - : Alpha(aAlpha), - Len2(aLen2), - wn(0), - w1(AVIR_PId2 / Len2, AVIR_PI * 0.5) {} - - /** - * @return The next Peaked Cosine window function coefficient. - */ - - double generate() { - const double h = pow(wn / Len2, Alpha); - wn++; - - return (w1.generate() * (1.0 - h)); - } - - private: - double Alpha; ///< Alpha parameter, affects the peak shape of window. - ///< - double Len2; ///< Half length of the window function. - ///< - int wn; ///< Window function integer position. 0 - center of the - ///< window function. - ///< - CSineGen w1; ///< Sine-wave generator. - ///< -}; - -/** - * @brief FIR filter-based equalizer generator. - * - * Class implements an object used to generate symmetric-odd FIR filters with - * the specified frequency response (aka paragraphic equalizer). The - * calculated filter is windowed by the Peaked Cosine window function. - * - * In image processing, due to short length of filters being used (6-8 taps) - * the resulting frequency response of the filter is approximate and may be - * mathematically imperfect, but still adequate to the visual requirements. - * - * On a side note, this equalizer generator can be successfully used for audio - * signal equalization as well: for example, it is used in almost the same - * form in Voxengo Marvel GEQ equalizer plug-in. - * - * Filter generation is based on decomposition of frequency range into - * spectral bands, with each band represented by linear and ramp "kernels". - * When the filter is built, these kernels are combined together with - * different weights that approximate the required frequency response. - */ - -class CDSPFIREQ { - public: - /** - * Function initializes *this object with the required parameters. The - * gain of frequencies beyond the MinFreq..MaxFreq range are controlled by - * the first and the last band's gain. - * - * @param SampleRate Processing sample rate (use 2 for image processing). - * @param aFilterLength Required filter length in samples (taps). The - * actual filter length is truncated to an integer value. - * @param aBandCount Number of band crossover points required to control, - * including bands at MinFreq and MaxFreq. - * @param MinFreq Minimal frequency that should be controlled. - * @param MaxFreq Maximal frequency that should be controlled. - * @param IsLogBands "True" if the bands should be spaced logarithmically. - * @param WFAlpha Peaked Cosine window function's Alpha parameter. - */ - - void init(const double SampleRate, const double aFilterLength, - const int aBandCount, const double MinFreq, const double MaxFreq, - const bool IsLogBands, const double WFAlpha) { - FilterLength = aFilterLength; - BandCount = aBandCount; - - CenterFreqs.alloc(BandCount); - - z = (int)ceil(FilterLength * 0.5); - zi = z + (z & 1); - z2 = z * 2; - - CBuffer oscbuf(z2); - initOscBuf(oscbuf); - - CBuffer winbuf(z); - initWinBuf(winbuf, WFAlpha); - - UseFirstVirtBand = (MinFreq > 0.0); - const int k = zi * (BandCount + (UseFirstVirtBand ? 1 : 0)); - Kernels1.alloc(k); - Kernels2.alloc(k); - - double m; // Frequency step multiplier. - double mo; // Frequency step offset (addition). - - if (IsLogBands) { - m = exp(log(MaxFreq / MinFreq) / (BandCount - 1)); - mo = 0.0; - } else { - m = 1.0; - mo = (MaxFreq - MinFreq) / (BandCount - 1); - } - - double f = MinFreq; - double x1 = 0.0; - double x2; - int si; - - if (UseFirstVirtBand) { - si = 0; - } else { - si = 1; - CenterFreqs[0] = 0.0; - f = f * m + mo; - } - - double* kernbuf1 = &Kernels1[0]; - double* kernbuf2 = &Kernels2[0]; - int i; - - for (i = si; i < BandCount; i++) { - x2 = f * 2.0 / SampleRate; - CenterFreqs[i] = x2; - - fillBandKernel(x1, x2, kernbuf1, kernbuf2, oscbuf, winbuf); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - f = f * m + mo; - } - - if (x1 < 1.0) { - UseLastVirtBand = true; - fillBandKernel(x1, 1.0, kernbuf1, kernbuf2, oscbuf, winbuf); - } else { - UseLastVirtBand = false; - } - } - - /** - * @return Filter's length, in samples (taps). - */ - - int getFilterLength() const { return (z2 - 1); } - - /** - * @return Filter's latency (group delay), in samples (taps). - */ - - int getFilterLatency() const { return (z - 1); } - - /** - * Function creates symmetric-odd FIR filter with the specified gain - * levels at band crossover points. - * - * @param BandGains Array of linear gain levels, count=BandCount specified - * in the init() function. - * @param[out] Filter Output filter buffer, length = getFilterLength(). - */ - - void buildFilter(const double* const BandGains, double* const Filter) { - const double* kernbuf1 = &Kernels1[0]; - const double* kernbuf2 = &Kernels2[0]; - double x1 = 0.0; - double y1 = BandGains[0]; - double x2; - double y2; - - int i; - int si; - - if (UseFirstVirtBand) { - si = 1; - x2 = CenterFreqs[0]; - y2 = y1; - } else { - si = 2; - x2 = CenterFreqs[1]; - y2 = BandGains[1]; - } - - copyBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - y1 = y2; - - for (i = si; i < BandCount; i++) { - x2 = CenterFreqs[i]; - y2 = BandGains[i]; - - addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - y1 = y2; - } - - if (UseLastVirtBand) { - addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - y1); - } - - for (i = 0; i < z - 1; i++) { - Filter[z + i] = Filter[z - 2 - i]; - } - } - - /** - * Function calculates filter's length (in samples) and latency depending - * on the required non-truncated filter length. - * - * @param aFilterLength Required filter length in samples (non-truncated). - * @param[out] Latency Resulting latency (group delay) of the filter, - * in samples (taps). - * @return Filter length in samples (taps). - */ - - static int calcFilterLength(const double aFilterLength, int& Latency) { - const int l = (int)ceil(aFilterLength * 0.5); - Latency = l - 1; - - return (l * 2 - 1); - } - - private: - double FilterLength; ///< Length of filter. - ///< - int z; ///< Equals (int) ceil( FilterLength * 0.5 ). - ///< - int zi; ///< Equals "z" if z is even, or z + 1 if z is odd. Used as a - ///< Kernels1 and Kernels2 size multiplier and kernel buffer - ///< increment to make sure each kernel buffer is 16-byte aligned. - ///< - int z2; ///< Equals z * 2. - ///< - int BandCount; ///< Number of controllable bands. - ///< - CBuffer CenterFreqs; ///< Center frequencies for all bands, - ///< normalized to 0.0-1.0 range. - ///< - CBuffer Kernels1; ///< Half-length kernel buffers for each - ///< spectral band (linear part). - ///< - CBuffer Kernels2; ///< Half-length kernel buffers for each - ///< spectral band (ramp part). - ///< - bool UseFirstVirtBand; ///< "True" if the first virtual band - ///< (between 0.0 and MinFreq) should be used. The - ///< first virtual band won't be used if MinFreq - ///< equals 0.0. - ///< - bool UseLastVirtBand; ///< "True" if the last virtual band (between - ///< MaxFreq and SampleRate * 0.5) should be used. The - ///< last virtual band won't be used if MaxFreq * 2.0 - ///< equals SampleRate. - ///< - - /** - * Function initializes the "oscbuf" used in the fillBandKernel() - * function. - * - * @param oscbuf Oscillator buffer, length = z * 2. - */ - - void initOscBuf(double* oscbuf) const { - int i = z; - - while (i > 0) { - oscbuf[0] = 0.0; - oscbuf[1] = 1.0; - oscbuf += 2; - i--; - } - } - - /** - * Function initializes window function buffer. This function generates - * Peaked Cosine window function. - * - * @param winbuf Windowing buffer. - * @param Alpha Peaked Cosine alpha parameter. - */ - - void initWinBuf(double* winbuf, const double Alpha) const { - CDSPWindowGenPeakedCosine wf(Alpha, FilterLength * 0.5); - int i; - - for (i = 1; i <= z; i++) { - winbuf[z - i] = wf.generate(); - } - } - - /** - * Function fills first half of symmetric-odd FIR kernel for the band. - * This function should be called successively for adjacent bands. - * Previous band's x2 should be equal to current band's x1. A band kernel - * consists of 2 elements: linear kernel and ramp kernel. - * - * @param x1 Band's left corner frequency (0..1). - * @param x2 Band's right corner frequency (0..1). - * @param kernbuf1 Band kernel buffer 1 (linear part), length = z. - * @param kernbuf2 Band kernel buffer 2 (ramp part), length = z. - * @param oscbuf Oscillation buffer. Before the first call of the - * fillBandKernel() should be initialized with the call of the - * initOscBuf() function. - * @param winbuf Buffer that contains windowing function. - */ - - void fillBandKernel(const double x1, const double x2, double* kernbuf1, - double* kernbuf2, double* oscbuf, - const double* const winbuf) { - const double s2_incr = AVIR_PI * x2; - const double s2_coeff = 2.0 * cos(s2_incr); - - double s2_value1 = sin(s2_incr * (-z + 1)); - double c2_value1 = sin(s2_incr * (-z + 1) + AVIR_PI * 0.5); - oscbuf[0] = sin(s2_incr * -z); - oscbuf[1] = sin(s2_incr * -z + AVIR_PI * 0.5); - - int ks; - - for (ks = 1; ks < z; ks++) { - const int ks2 = ks * 2; - const double s1_value1 = oscbuf[ks2]; - const double c1_value1 = oscbuf[ks2 + 1]; - oscbuf[ks2] = s2_value1; - oscbuf[ks2 + 1] = c2_value1; - - const double x = AVIR_PI * (ks - z); - const double v0 = winbuf[ks - 1] / ((x1 - x2) * x); - - kernbuf1[ks - 1] = - (x2 * s2_value1 - x1 * s1_value1 + (c2_value1 - c1_value1) / x) * v0; - - kernbuf2[ks - 1] = (s2_value1 - s1_value1) * v0; - - s2_value1 = s2_coeff * s2_value1 - oscbuf[ks2 - 2]; - c2_value1 = s2_coeff * c2_value1 - oscbuf[ks2 - 1]; - } - - kernbuf1[z - 1] = (x2 * x2 - x1 * x1) / (x1 - x2) * 0.5; - kernbuf2[z - 1] = -1.0; - } - - /** - * Function copies band kernel's elements to the output buffer. - * - * @param outbuf Output buffer. - * @param kernbuf1 Kernel buffer 1 (linear part). - * @param kernbuf2 Kernel buffer 2 (ramp part). - * @param c Multiplier for linear kernel element. - * @param d Multiplier for ramp kernel element. - */ - - void copyBandKernel(double* outbuf, const double* const kernbuf1, - const double* const kernbuf2, const double c, - const double d) const { - int ks; - - for (ks = 0; ks < z; ks++) { - outbuf[ks] = c * kernbuf1[ks] + d * kernbuf2[ks]; - } - } - - /** - * Function adds band kernel's elements to the output buffer. - * - * @param outbuf Output buffer. - * @param kernbuf1 Kernel buffer 1 (linear part). - * @param kernbuf2 Kernel buffer 2 (ramp part). - * @param c Multiplier for linear kernel element. - * @param d Multiplier for ramp kernel element. - */ - - void addBandKernel(double* outbuf, const double* const kernbuf1, - const double* const kernbuf2, const double c, - const double d) const { - int ks; - - for (ks = 0; ks < z; ks++) { - outbuf[ks] += c * kernbuf1[ks] + d * kernbuf2[ks]; - } - } -}; - -/** - * @brief Low-pass filter windowed by Peaked Cosine window function. - * - * This class implements calculation of linear-phase symmetric-odd FIR - * low-pass filter windowed by the Peaked Cosine window function, for image - * processing applications. - */ - -class CDSPPeakedCosineLPF { - public: - int fl2; ///< Half filter's length, excluding the peak value. This value - ///< can be also used as filter's latency in samples (taps). - ///< - int FilterLen; ///< Filter's length in samples (taps). - ///< - - /** - * Constructor initalizes *this object. - * - * @param aLen2 Half-length (non-truncated) of low-pass filter, in samples - * (taps). - * @param aFreq2 Low-pass filter's corner frequency [0; pi]. - * @param aAlpha Peaked Cosine window function Alpha parameter. - */ - - CDSPPeakedCosineLPF(const double aLen2, const double aFreq2, - const double aAlpha) - : fl2((int)ceil(aLen2) - 1), - FilterLen(fl2 + fl2 + 1), - Len2(aLen2), - Freq2(aFreq2), - Alpha(aAlpha) {} - - /** - * Function generates a linear-phase low-pass filter windowed by Peaked - * Cosine window function. - * - * @param[out] op Output buffer, length = FilterLen (fl2 * 2 + 1). - * @param DCGain Required gain at DC. The resulting filter will be - * normalized to achieve this DC gain. - */ - - template - void generateLPF(T* op, const double DCGain) { - CDSPWindowGenPeakedCosine wf(Alpha, Len2); - CSineGen f2(Freq2, 0.0); - - op += fl2; - T* op2 = op; - f2.generate(); - int t = 1; - - *op = (T)(Freq2 * wf.generate() / AVIR_PI); - double s = *op; - - while (t <= fl2) { - const double v = f2.generate() * wf.generate() / t / AVIR_PI; - op++; - op2--; - *op = (T)v; - *op2 = (T)v; - s += *op + *op2; - t++; - } - - t = FilterLen; - s = DCGain / s; - - while (t > 0) { - *op2 = (T)(*op2 * s); - op2++; - t--; - } - } - - private: - double Len2; ///< Half-length (non-truncated) of low-pass filter, in - ///< samples (taps). - ///< - double Freq2; ///< Low-pass filter's corner frequency. - ///< - double Alpha; ///< Peaked Cosine window function Alpha parameter. - ///< -}; - -/** - * @brief Buffer class for parametrized low-pass filter. - * - * This class extends the CBuffer< double > class by adding several variables - * that define a symmetric-odd FIR low-pass filter windowed by Peaked Cosine - * window function. This class can be used to compare filters without - * comparing their buffer contents. - */ - -class CFltBuffer : public CBuffer { - public: - double Len2; ///< Half-length (non-truncated) of low-pass filters, in - ///< samples (taps). - ///< - double Freq; ///< Low-pass filter's corner frequency. - ///< - double Alpha; ///< Peaked Cosine window function Alpha parameter. - ///< - double DCGain; ///< DC gain applied to the filter. - ///< - - CFltBuffer() - : CBuffer(), Len2(0.0), Freq(0.0), Alpha(0.0), DCGain(0.0) {} - - /** - * @param b2 Filter buffer to compare *this object to. - * @return Operator returns "true" if both filters have same parameters. - */ - - bool operator==(const CFltBuffer& b2) const { - return (Len2 == b2.Len2 && Freq == b2.Freq && Alpha == b2.Alpha && - DCGain == b2.DCGain); - } -}; - -/** - * @brief Sinc function-based fractional delay filter bank. - * - * Class implements storage and initialization of a bank of sinc - * function-based fractional delay filters, expressed as 1st order polynomial - * interpolation coefficients. The filters are produced from a single "long" - * windowed low-pass filter. Also supports 0th-order ("nearest neighbor") - * interpolation. - * - * This class also supports multiplication of each fractional delay filter by - * an external filter (usually a low-pass filter). - * - * @tparam fptype Specifies storage type of the filter coefficients bank. The - * filters are initially calculated using the "double" precision. - */ - -template -class CDSPFracFilterBankLin { - public: - CDSPFracFilterBankLin() : Order(-1) {} - - /** - * Copy constructor copies a limited set of parameters of the source - * filter bank. The actual filters are not copied. Such copying is used - * during filtering steps "modeling" stage. A further init() function - * call is required. - * - * @param s Source filter bank. - */ - - void copyInitParams(const CDSPFracFilterBankLin& s) { - WFLen2 = s.WFLen2; - WFFreq = s.WFFreq; - WFAlpha = s.WFAlpha; - FracCount = s.FracCount; - Order = s.Order; - Alignment = s.Alignment; - SrcFilterLen = s.SrcFilterLen; - FilterLen = s.FilterLen; - FilterSize = s.FilterSize; - IsSrcTableBuilt = false; - ExtFilter = s.ExtFilter; - TableFillFlags.alloc(s.TableFillFlags.getCapacity()); - int i; - - // Copy table fill flags, but shifted so that further initialization - // is still possible (such feature should not be used, though). - - for (i = 0; i < TableFillFlags.getCapacity(); i++) { - TableFillFlags[i] = (uint8_t)(s.TableFillFlags[i] << 2); - } - } - - /** - * Operator compares *this filter bank and another filter bank and returns - * "true" if their parameters are equal. Alignment is not taken into - * account. - * - * @param s Filter bank to compare to. - * @return "True" if compared banks have equal parameters. - */ - - bool operator==(const CDSPFracFilterBankLin& s) const { - return (Order == s.Order && WFLen2 == s.WFLen2 && WFFreq == s.WFFreq && - WFAlpha == s.WFAlpha && FracCount == s.FracCount && - ExtFilter == s.ExtFilter); - } - - /** - * Function initializes (builds) the filter bank based on the supplied - * parameters. If the supplied parameters are equal to previously defined - * parameters, function does nothing (alignment is assumed to be never - * changing between the init() function calls). - * - * @param ReqFracCount Required number of fractional delays in the filter - * bank. The minimal value is 2. - * @param ReqOrder Required order of the interpolation polynomial - * (0 or 1). - * @param BaseLen Low-pass filter's base length, in samples (taps). - * Affects the actual length of the filter and its overall steepness. - * @param Cutoff Low-pass filter's normalized cutoff frequency [0; 1]. - * @param aWFAlpha Peaked Cosine window function's Alpha parameter. - * @param aExtFilter External filter to apply to each fractional delay - * filter. - * @param aAlignment Memory alignment of the filter bank, power-of-2 - * value. 0 - use default stdlib alignment. - * @param FltLenAlign Filter's length alignment, power-of-2 value. - */ - - void init(const int ReqFracCount, const int ReqOrder, const double BaseLen, - const double Cutoff, const double aWFAlpha, - const CFltBuffer& aExtFilter, const int aAlignment = 0, - const int FltLenAlign = 1) { - double NewWFLen2 = 0.5 * BaseLen * ReqFracCount; - double NewWFFreq = AVIR_PI * Cutoff / ReqFracCount; - double NewWFAlpha = aWFAlpha; - - if (ReqOrder == Order && NewWFLen2 == WFLen2 && NewWFFreq == WFFreq && - NewWFAlpha == WFAlpha && ReqFracCount == FracCount && - aExtFilter == ExtFilter) { - IsInitRequired = false; - return; - } - - WFLen2 = NewWFLen2; - WFFreq = NewWFFreq; - WFAlpha = NewWFAlpha; - FracCount = ReqFracCount; - Order = ReqOrder; - Alignment = aAlignment; - ExtFilter = aExtFilter; - - CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha); - SrcFilterLen = (p.fl2 / ReqFracCount + 1) * 2; - - const int ElementSize = ReqOrder + 1; - FilterLen = SrcFilterLen; - - if (ExtFilter.getCapacity() > 0) { - FilterLen += ExtFilter.getCapacity() - 1; - } - - FilterLen = (FilterLen + FltLenAlign - 1) & ~(FltLenAlign - 1); - FilterSize = FilterLen * ElementSize; - IsSrcTableBuilt = false; - IsInitRequired = true; - } - - /** - * @return The length of each fractional delay filter, in samples (taps). - * Always an even value. - */ - - int getFilterLen() const { return (FilterLen); } - - /** - * @return The number of fractional filters in use by *this bank. - */ - - int getFracCount() const { return (FracCount); } - - /** - * @return The order of the interpolation polynomial. - */ - - int getOrder() const { return (Order); } - - /** - * Function returns the pointer to the specified interpolation table - * filter. - * - * @param i Filter (fractional delay) index, in the range 0 to - * ReqFracCount - 1, inclusive. - * @return Pointer to filter. Higher order polynomial coefficients are - * stored after after previous order coefficients, separated by FilterLen - * elements. - */ - - const fptype* getFilter(const int i) { - if (!IsSrcTableBuilt) { - buildSrcTable(); - } - - fptype* const Res = &Table[i * FilterSize]; - - if ((TableFillFlags[i] & 2) == 0) { - createFilter(i); - TableFillFlags[i] |= 2; - - if (Order > 0) { - createFilter(i + 1); - const fptype* const Res2 = Res + FilterSize; - fptype* const op = Res + FilterLen; - int j; - - // Create higher-order interpolation coefficients (linear - // interpolation). - - for (j = 0; j < FilterLen; j++) { - op[j] = Res2[j] - Res[j]; - } - } - } - - return (Res); - } - - /** - * Function makes sure all fractional delay filters were created. - */ - - void createAllFilters() { - int i; - - for (i = 0; i < FracCount; i++) { - getFilter(i); - } - } - - /** - * Function returns an approximate initialization complexity, expressed in - * the number of multiply-add operations. This includes fractional delay - * filters calculation and multiplication by an external filter. This - * function can only be called after the init() function. - * - * @param FracUseMap Fractional delays use map, each element corresponds - * to a single fractional delay, will be compared to the internal table - * fill flags. This map should include 0 and 1 values only. - * @return The complexity of the initialization, expressed in the number - * of multiply-add operations. - */ - - int calcInitComplexity(const CBuffer& FracUseMap) const { - const int FltInitCost = 65; // Cost to initialize a single sample - // of the fractional delay filter. - const int FltUseCost = - FilterLen * Order + - SrcFilterLen * ExtFilter.getCapacity(); // Cost to use a single - // fractional delay filter. - const int ucb[2] = {0, FltUseCost}; - int ic; - int i; - - if (IsInitRequired) { - ic = FracCount * SrcFilterLen * FltInitCost; - - for (i = 0; i < FracCount; i++) { - ic += ucb[FracUseMap[i]]; - } - } else { - ic = 0; - - for (i = 0; i < FracCount; i++) { - if (FracUseMap[i] != 0) { - ic += ucb[TableFillFlags[i] == 0 ? 1 : 0]; - } - } - } - - return (ic); - } - - private: - static const int InterpPoints = 2; ///< The maximal number of points the - ///< interpolation is based on. - ///< - double WFLen2; ///< Window function's Len2 parameter. - ///< - double WFFreq; ///< Window function's Freq parameter. - ///< - double WFAlpha; ///< Window function's Alpha parameter. - ///< - int FracCount; ///< The required number of fractional delay filters. - ///< - int Order; ///< The order of the interpolation polynomial. - ///< - int Alignment; ///< The required filter table alignment. - ///< - int SrcFilterLen; ///< Length of the "source" filters. This is always an - ///< even value. - ///< - int FilterLen; ///< Specifies the number of samples (taps) each fractional - ///< delay filter has. This is always an even value, adjusted - ///< by the FltLenAlign. - ///< - int FilterSize; ///< The size of a single filter element, equals - ///< FilterLen * ElementSize. - ///< - bool IsInitRequired; ///< "True" if SrcTable filter table initialization - ///< is required. This value is available only after the - ///< call to the init() function. - ///< - CBuffer Table; ///< Interpolation table, size equals to - ///< ReqFracCount * FilterLen * ElementSize. - ///< - CBuffer - TableFillFlags; ///< Contains ReqFracCount + 1 - ///< elements. Bit 0 of every element is 1 if Table - ///< already contains the filter from SrcTable filtered - ///< by ExtFilter. Bit 1 of every element means higher - ///< order coefficients were filled for the filter. - ///< - CFltBuffer ExtFilter; ///< External filter that should be applied to every - ///< fractional delay filter. Can be empty. Half of - ///< this filter's capacity is used as latency (group - ///< delay) value of the filter. - ///< - CBuffer SrcTable; ///< Source table of delay filters, contains - ///< ReqFracCount + 1 elements. This table is used - ///< to fill the Table with the actual filters, - ///< filtered by an external filter. - ///< - bool IsSrcTableBuilt; ///< "True" if the SrcTable was built already. This - ///< variable is set to "false" in the init() function. - ///< - - /** - * Function builds source table used in the createFilter() function. - */ - - void buildSrcTable() { - IsSrcTableBuilt = true; - IsInitRequired = false; - - CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha); - - const int BufLen = SrcFilterLen * FracCount + InterpPoints - 1; - const int BufOffs = InterpPoints / 2 - 1; - const int BufCenter = SrcFilterLen * FracCount / 2 + BufOffs; - - CBuffer Buf(BufLen); - memset(Buf, 0, (BufCenter - p.fl2) * sizeof(double)); - int i = BufLen - BufCenter - p.fl2 - 1; - memset(&Buf[BufLen - i], 0, i * sizeof(double)); - - p.generateLPF(&Buf[BufCenter - p.fl2], FracCount); - abort(); - - SrcTable.alloc((FracCount + 1) * SrcFilterLen); - TableFillFlags.alloc(FracCount + 1); - int j; - double* op0 = SrcTable; - - for (i = FracCount; i >= 0; i--) { - TableFillFlags[i] = 0; - double* p = Buf + BufOffs + i; - - for (j = 0; j < SrcFilterLen; j++) { - op0[0] = p[0]; - op0++; - p += FracCount; - } - } - - Table.alloc((FracCount + 1) * FilterSize, Alignment); - } - - /** - * Function creates the specified filter in the Table by copying it from - * the SrcTable and filtering by ExtFilter. Function does nothing if - * filter was already created. - * - * @param k Filter index to create, in the range 0 to FracCount, - * inclusive. - */ - - void createFilter(const int k) { - if (TableFillFlags[k] != 0) { - return; - } - - TableFillFlags[k] |= 1; - const int ExtFilterLatency = ExtFilter.getCapacity() / 2; - const int ResLatency = ExtFilterLatency + SrcFilterLen / 2; - int ResLen = SrcFilterLen; - - if (ExtFilter.getCapacity() > 0) { - ResLen += ExtFilter.getCapacity() - 1; - } - - const int ResOffs = FilterLen / 2 - ResLatency; - fptype* op = &Table[k * FilterSize]; - int i; - - for (i = 0; i < ResOffs; i++) { - op[i] = 0.0; - } - - for (i = ResOffs + ResLen; i < FilterLen; i++) { - op[i] = 0.0; - } - - op += ResOffs; - const double* const srcflt = &SrcTable[k * SrcFilterLen]; - - if (ExtFilter.getCapacity() == 0) { - for (i = 0; i < ResLen; i++) { - op[i] = (fptype)srcflt[i]; - } - - return; - } - - // Perform convolution of extflt and srcflt. - - const double* const extflt = &ExtFilter[0]; - int j; - - for (j = 0; j < ResLen; j++) { - int k = 0; - int l = j - ExtFilter.getCapacity() + 1; - int r = l + ExtFilter.getCapacity(); - - if (l < 0) { - k -= l; - l = 0; - } - - if (r > SrcFilterLen) { - r = SrcFilterLen; - } - - const double* const extfltb = extflt + k; - const double* const srcfltb = srcflt + l; - double s = 0.0; - l = r - l; - - for (i = 0; i < l; i++) { - s += extfltb[i] * srcfltb[i]; - } - - op[j] = (fptype)s; - } - } -}; - -/** - * @brief Thread pool for multi-threaded image resizing operation. - * - * This base class is used to organize a multi-threaded image resizing - * operation. The thread pool should consist of threads that initially wait - * for a signal. Upon receiving a signal (via the startAllWorkloads() - * function) each previously added thread should execute its workload's - * process() function once, and return to the wait signal state again. The - * thread pool should be also able to efficiently wait for all workloads to - * finish via the waitAllWorkloadsToFinish() function. - * - * The image resizing algorithm makes calls to functions of this class. - */ - -class CImageResizerThreadPool { - public: - CImageResizerThreadPool() {} - - virtual ~CImageResizerThreadPool() {} - - /** - * @brief Thread pool's workload object class. - * - * This class should be used as a base class for objects that perform the - * actual work spread over several threads. - */ - - class CWorkload { - public: - virtual ~CWorkload() {} - - /** - * Function that gets called from the thread when thread pool's - * startAllWorkloads() function is called. - */ - - virtual void process() = 0; - }; - - /** - * @return The suggested number of workloads (and their associated - * threads) to add. The minimal value this function can return is 1. The - * usual value may depend on the number of physical and virtual cores - * present in the system, and on other considerations. - */ - - virtual int getSuggestedWorkloadCount() const { return (1); } - - /** - * Function adds a new workload (and possibly thread) to the thread pool. - * The caller decides how many parallel workloads (and threads) it - * requires, but this number will not exceed the value returned by the - * getSuggestedWorkloadCount() function. It is implementation-specific how - * many workloads to associate with a single thread. But for efficiency - * reasons each workload should be associated with its own thread. - * - * Note that the same set of workload objects will be processed each time - * the startAllWorkloads() function is called. This means that workload - * objects are added only once. The caller changes the state of the - * workload objects and then calls the startAllWorkloads() function to - * process them. - * - * @param Workload Workload object whose process() function will be called - * from within the thread when the startAllWorkloads() function is called. - */ - - virtual void addWorkload(CWorkload* const Workload) {} - - /** - * Function starts all workloads associated with threads previously added - * via the addWorkload() function. It is assumed that this function - * performs the necessary "memory barrier" (or "cache sync") kind of - * operation so that all threads catch up the prior changes made to the - * workload objects during their wait state. - */ - - virtual void startAllWorkloads() {} - - /** - * Function waits for all workloads to finish. - */ - - virtual void waitAllWorkloadsToFinish() {} - - /** - * Function removes all workloads previously added via the addWorkload() - * function. This function gets called only after the - * waitAllWorkloadsToFinish() function call. - */ - - virtual void removeAllWorkloads() {} -}; - -/** - * @brief Resizing algorithm parameters structure. - * - * This structure holds all selectable parameters used by the resizing - * algorithm at various stages, for both downsizing and upsizing. There are no - * other parameters exist that can optimize the performance of the resizing - * algorithm. Filter length parameters can take fractional values. - * - * Beside quality, these parameters (except Alpha parameters) directly affect - * the computative cost of the resizing algorithm. It is possible to trade - * the visual quality for computative cost. - * - * Anti-alias filtering during downsizing can be defined as a considerable - * reduction of contrast of smallest features of an image. Unfortunately, such - * de-contrasting partially affects features of all sizes thus producing a - * non-linearity of frequency response. All pre-defined parameter sets are - * described by 3 values separated by slashes. The first value is the - * de-contrasting factor of small features (which are being removed) while - * the second value is the de-contrasting factor of large features (which - * should remain intact), with value of 1 equating to "no contrast change". - * The third value is the optimization score (see below), with value of 0 - * equating to the "perfect" linearity of frequency response. - * - * The pre-defined parameter sets offered by this library were auto-optimized - * for the given LPFltBaseLen, IntFltLen and CorrFltAlpha values. The - * optimization goal was to minimize the score: the sum of squares of the - * difference between original and processed images (which was not actually - * resized, k=1). The original image was a 0.5 megapixel uniformly-distributed - * white-noise image with pixel intensities in the 0-1 range. Such goal - * converges very well and produces filtering system with the flattest - * frequency response possible for the given constraints. With this goal, - * increasing the LPFltBaseLen value reduces the general amount of aliasing - * artifacts. - */ - -struct CImageResizerParams { - double CorrFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the correction filter. The - ///< "usable" values are in the narrow range 1.0 to 1.5. - ///< - double CorrFltLen; ///< Correction filter's length in samples (taps). The - ///< "usable" range is narrow, 5.5 to 8, as to minimize - ///< the "overcorrection" which is mathematically precise, - ///< but visually unacceptable. - ///< - double IntFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the interpolation low-pass filter. - ///< The "usable" values are in the range 1.5 to 2.5. - ///< - double IntFltCutoff; ///< Interpolation low-pass filter's cutoff frequency - ///< (normalized, [0; 1]). The "usable" range is 0.6 to - ///< 0.8. - ///< - double IntFltLen; ///< Interpolation low-pass filter's length in samples - ///< (taps). The length value should be at least 18 or - ///< otherwise a "dark grid" artifact will be introduced if - ///< a further sharpening is applied. IntFltLen together - ///< with other IntFlt parameters should be tuned in a way - ///< that produces the flattest frequency response in 0-0.5 - ///< normalized frequency range (this range is due to 2X - ///< upsampling). - ///< - double LPFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the low-pass filter. The "usable" - ///< values are in the range 1.5 to 6.5. - ///< - double LPFltBaseLen; ///< Base length of the low-pass (aka anti-aliasing - ///< or reconstruction) filter, in samples (taps), - ///< further adjusted by the actual cutoff frequency, - ///< upsampling and downsampling factors. The "usable" - ///< range is between 6 and 9. - ///< - double LPFltCutoffMult; ///< Low-pass filter's cutoff frequency - ///< multiplier. This value can be both below and - ///< above 1.0 as low-pass filters are inserted on - ///< downsampling and upsampling steps and always - ///< have corner frequency equal to or below 0.5pi. - ///< This multiplier shifts low-pass filter's corner - ///< frequency towards lower (if below 1.0) or higher - ///< (if above 1.0) frequencies. This multiplier can - ///< be way below 1.0 since any additional - ///< high-frequency damping will be partially - ///< corrected by the correction filter. The "usable" - ///< range is 0.3 to 1.0. - ///< - - CImageResizerParams() - : HBFltAlpha(1.75395), HBFltCutoff(0.40356), HBFltLen(22.00000) {} - - double HBFltAlpha; ///< Half-band filter's Alpha. Assigned internally. - ///< - double HBFltCutoff; ///< Half-band filter's cutoff point [0; 1]. Assigned - ///< internally. - ///< - double HBFltLen; ///< Length of the half-band low-pass filter. Assigned - ///< internally. Internally used to perform 2X or higher - ///< downsampling. These filter parameters should be treated - ///< as "technical" and do not require adjustment as they - ///< were tuned to suit all combinations of other - ///< parameters. This half-band filter provides a wide - ///< transition band (for minimal ringing artifacts) and a - ///< high stop-band attenuation (for minimal aliasing). - ///< -}; - -/** - * @brief The default set of resizing algorithm parameters - * (10.01/1.029/0.019169). - * - * This is the default set of resizing parameters that was designed to deliver - * a sharp image while still providing a low amount of ringing artifacts, and - * having a reasonable computational cost. - */ - -struct CImageResizerParamsDef : public CImageResizerParams { - CImageResizerParamsDef() { - CorrFltAlpha = 1.0; // 10.01/1.88/1.029(522.43)/0.019169:258648,446808 - CorrFltLen = 6.30770; - IntFltAlpha = 2.27825; - IntFltCutoff = 0.75493; - IntFltLen = 18.0; - LPFltAlpha = 3.40127; - LPFltBaseLen = 7.78; - LPFltCutoffMult = 0.78797; - } -}; - -/** - * @brief Set of resizing algorithm parameters for ultra-low-ringing - * performance (7.69/1.069/0.000245). - * - * This set of resizing algorithm parameters offers the lowest amount of - * ringing this library is capable of providing while still offering a decent - * quality. Low ringing is attained at the expense of higher aliasing - * artifacts and a slightly reduced contrast. - */ - -struct CImageResizerParamsULR : public CImageResizerParams { - CImageResizerParamsULR() { - CorrFltAlpha = 1.0; // 7.69/1.97/1.069(31445.45)/0.000245:258627,436845 - CorrFltLen = 5.83280; - IntFltAlpha = 2.11453; - IntFltCutoff = 0.73986; - IntFltLen = 18.0; - LPFltAlpha = 1.73455; - LPFltBaseLen = 6.40; - LPFltCutoffMult = 0.61314; - } -}; - -/** - * @brief Set of resizing algorithm parameters for low-ringing performance - * (7.86/1.065/0.000106). - * - * This set of resizing algorithm parameters offers a very low-ringing - * performance at the expense of higher aliasing artifacts and a slightly - * reduced contrast. - */ - -struct CImageResizerParamsLR : public CImageResizerParams { - CImageResizerParamsLR() { - CorrFltAlpha = 1.0; // 7.86/1.96/1.065(73865.02)/0.000106:258636,437381 - CorrFltLen = 5.87671; - IntFltAlpha = 2.25322; - IntFltCutoff = 0.74090; - IntFltLen = 18.0; - LPFltAlpha = 1.79306; - LPFltBaseLen = 7.00; - LPFltCutoffMult = 0.68881; - } -}; - -/** - * @brief Set of resizing algorithm parameters for lower-ringing performance - * (8.86/1.046/0.010168). - * - * This set of resizing algorithm parameters offers a lower-ringing - * performance in comparison to the default setting, at the expense of higher - * aliasing artifacts and a slightly reduced contrast. - */ - -struct CImageResizerParamsLow : public CImageResizerParams { - CImageResizerParamsLow() { - CorrFltAlpha = 1.0; // 8.86/1.92/1.046(871.54)/0.010168:258647,442252 - CorrFltLen = 6.09757; - IntFltAlpha = 2.36704; - IntFltCutoff = 0.74674; - IntFltLen = 18.0; - LPFltAlpha = 2.19427; - LPFltBaseLen = 7.66; - LPFltCutoffMult = 0.75380; - } -}; - -/** - * @brief Set of resizing algorithm parameters for low-aliasing - * resizing (11.81/1.012/0.038379). - * - * This set of resizing algorithm parameters offers a considerable - * anti-aliasing performance with a good frequency response linearity (and - * contrast). This is an intermediate setting between the default and Ultra - * parameters. - */ - -struct CImageResizerParamsHigh : public CImageResizerParams { - CImageResizerParamsHigh() { - CorrFltAlpha = 1.0; // 11.81/1.83/1.012(307.84)/0.038379:258660,452719 - CorrFltLen = 6.80909; - IntFltAlpha = 2.44917; - IntFltCutoff = 0.75856; - IntFltLen = 18.0; - LPFltAlpha = 4.39527; - LPFltBaseLen = 8.18; - LPFltCutoffMult = 0.79172; - } -}; - -/** - * @brief Set of resizing algorithm parameters for ultra low-aliasing - * resizing (13.65/1.001/0.000483). - * - * This set of resizing algorithm parameters offers a very considerable - * anti-aliasing performance with a good frequency response linearity (and - * contrast). This set of parameters is computationally expensive and may - * produce ringing artifacts on sharp features. - */ - -struct CImageResizerParamsUltra : public CImageResizerParams { - CImageResizerParamsUltra() { - CorrFltAlpha = 1.0; // 13.65/1.79/1.001(28288.41)/0.000483:258658,457974 - CorrFltLen = 7.48060; - IntFltAlpha = 1.93750; - IntFltCutoff = 0.75462; - IntFltLen = 18.0; - LPFltAlpha = 5.55209; - LPFltBaseLen = 8.34; - LPFltCutoffMult = 0.78002; - } -}; - -/** - * @brief Image resizing variables class. - * - * This is an utility "catch all" class that defines various variables used - * during image resizing. Several variables that are explicitly initialized in - * this class' constructor are also used as additional "input" variables to - * the image resizing function. These variables will not be changed by the - * avir::CImageResizer<>::resizeImage() function. - */ - -class CImageResizerVars { - public: - int ElCount; ///< The number of "fptype" elements used to store 1 pixel. - ///< - int ElCountIO; ///< The number of source and destination image's elements - ///< used to store 1 pixel. - ///< - int fppack; ///< The number of atomic types stored in a single "fptype" - ///< element. - ///< - int fpalign; ///< Suggested alignment size in bytes. This is not a - ///< required alignment, because image resizing algorithm cannot - ///< be made to have a strictly aligned data access in all cases - ///< (e.g. de-interleaved interpolation cannot perform aligned - ///< accesses). - ///< - int elalign; ///< Length alignment of arrays of elements. This applies to - ///< filters and intermediate buffers: this constant forces - ///< filters and scanlines to have a length which is a multiple - ///< of this value, for more efficient SIMD implementation. - ///< - int packmode; ///< 0 if interleaved packing, 1 if de-interleaved. - ///< - int BufLen[2]; ///< Intermediate buffers' lengths in "fptype" elements. - int BufOffs[2]; ///< Offsets into the intermediate buffers, used to - ///< provide prefix elements required during processing so - ///< that no "out of range" access happens. This offset is a - ///< multiple of ElCount if pixels are stored in interleaved - ///< form. - ///< - double k; ///< Resizing step coefficient, updated to reflect the actually - ///< used coefficient during resizing. - ///< - double o; ///< Starting pixel offset inside the source image, updated to - ///< reflect the actually used offset during resizing. - ///< - int ResizeStep; ///< Index of the resizing step in the latest filtering - ///< steps array. - ///< - double InGammaMult; ///< Input gamma multiplier, used to convert input - ///< data to 0 to 1 range. 0.0 if no gamma is in use. - ///< - double OutGammaMult; ///< Output gamma multiplier, used to convert data to - ///< 0 to 255/65535 range. 0.0 if no gamma is in use. - ///< - - double ox; ///< Start X pixel offset within source image (can be - ///< negative). Positive offset moves image to the left. - ///< - double oy; ///< Start Y pixel offset within source image (can be - ///< negative). Positive offset moves image to the top. - ///< - CImageResizerThreadPool* - ThreadPool; ///< Thread pool to be used by the - ///< image resizing function. Set to NULL to use - ///< single-threaded processing. - ///< - bool UseSRGBGamma; ///< Perform sRGB gamma linearization (correction). - ///< - int BuildMode; ///< The build mode to use, for debugging purposes. Set to - ///< -1 to select a minimal-complexity mode automatically. All - ///< build modes deliver similar results with minor - ///< deviations. - ///< - int RndSeed; ///< Random seed parameter. This parameter may be incremented - ///< after each random generator initialization. The use of this - ///< variable depends on the ditherer implementation. - ///< - - CImageResizerVars() - : ox(0.0), - oy(0.0), - ThreadPool(NULL), - UseSRGBGamma(false), - BuildMode(-1), - RndSeed(0) {} -}; - -/** - * @brief Image resizer's filtering step class. - * - * Class defines data to perform a single filtering step over a whole - * horizontal or vertical scanline. Resizing consists of 1 or more steps that - * may be performed before the actual resizing takes place. Filtering may also - * follow a resizing step. Each step must ensure that scanline data contains - * enough pixels to perform the next step (which may be resizing) without - * exceeding scanline's bounds. - * - * A derived class must implement several "const" and "static" functions that - * are used to perform the actual filtering in interleaved or de-interleaved - * mode. - * - * @tparam fptype Floating point type to use for storing pixel elements. SIMD - * types can be used: in this case each element may hold a whole pixel. - * @tparam fptypeatom The atomic type the "fptype" consists of. - */ - -template -class CImageResizerFilterStep { - public: - bool IsUpsample; ///< "True" if this step is an upsampling step, "false" - ///< if downsampling step. Should be set to "false" if - ///< ResampleFactor equals 0. - ///< - int ResampleFactor; ///< Resample factor (>=1). If 0, this is a resizing - ///< step. This value should be >1 if IsUpsample equals - ///< "true". - ///< - CBuffer Flt; ///< Filter to use at this step. - ///< - CFltBuffer FltOrig; ///< Originally-designed filter. This buffer may not - ///< be assigned. Assigned by filters that precede the - ///< resizing step if such filter is planned to be - ///< embedded into the interpolation filter as "external" - ///< filter. If IsUpsample=true and this filter buffer is - ///< not empty, the upsampling step will not itself apply - ///< any filtering over upsampled input scanline. - ///< - double DCGain; ///< DC gain which was applied to the filter. Not defined - ///< if ResampleFactor = 0. - ///< - int FltLatency; ///< Filter's latency (group delay, shift) in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int InLen; ///< Input scanline's length in pixels. - ///< - int InBuf; ///< Input buffer index, 0 or 1. - ///< - int InPrefix; ///< Required input prefix pixels. These prefix pixels will - ///< be filled with source scanline's first pixel value. If - ///< IsUpsample is "true", this is the additional number of - ///< times the first pixel will be filtered before processing - ///< scanline, this number is also reflected in the OutPrefix. - ///< - int InSuffix; ///< Required input suffix pixels. These suffix pixels will - ///< be filled with source scanline's last pixel value. If - ///< IsUpsample is "true", this is the additional number of - ///< times the last pixel will be filtered before processing - ///< scanline, this number is also reflected in the OutSuffix. - ///< - int InElIncr; ///< Pixel element increment within the input buffer, used - ///< during de-interleaved processing: in this case each - ///< image's channel is stored independently, InElIncr elements - ///< apart. - ///< - int OutLen; ///< Length of the resulting scanline. - ///< - int OutBuf; ///< Output buffer index. 0 or 1; 2 for the last step. - ///< - int OutPrefix; ///< Required output prefix pixels. These prefix pixels - ///< will not be pre-filled with any values. Value is valid - ///< only if IsUpsample equals "true". - ///< - int OutSuffix; ///< Required input suffix pixels. These suffix pixels will - ///< not be pre-filled with any values. Value is valid only if - ///< IsUpsample equals "true". - ///< - int OutElIncr; ///< Pixel element increment within the output buffer, used - ///< during de-interleaved processing. Equals to the - ///< InBufElIncr of the next step. - ///< - CBuffer PrefixDC; ///< DC component fluctuations added at the - ///< start of the resulting scanline, used when - ///< IsUpsample equals "true". - ///< - CBuffer SuffixDC; ///< DC component fluctuations added at the - ///< end of the resulting scanline, used when - ///< IsUpsample equals "true". - ///< - int EdgePixelCount; ///< The number of edge pixels added. Affects the - ///< initial position within the input scanline, used to - ///< produce edge pixels. This variable is used and - ///< should be defined when IsUpsample=false and - ///< ResampleFactor>0. When assigning this variable it is - ///< also necessary to update InPrefix, OutLen and Vars.o - ///< variables. - ///< - static const int EdgePixelCountDef = - 3; ///< The default number of pixels - ///< additionally produced at scanline edges during filtering. This is - ///< required to reduce edge artifacts. - ///< - - /** - * @brief Resizing position structure. - * - * Structure holds resizing position and pointer to fractional delay - * filter. - */ - - struct CResizePos { - int SrcPosInt; ///< Source scanline position. - ///< - int fti; ///< Fractional delay filter index. - ///< - const fptype* ftp; ///< Fractional delay filter pointer. - ///< - fptypeatom x; ///< Interpolation coefficient between delay filters. - ///< - int SrcOffs; ///< Source scanline offset. - ///< - }; - - /** - * @brief Resizing positions buffer class. - * - * This class combines buffer together with variables that define resizing - * stepping. - */ - - class CRPosBuf : public CBuffer { - public: - double k; ///< Resizing step. - ///< - double o; ///< Resizing offset. - ///< - int FracCount; ///< The number of fractional delay filters in a filter - ///< bank used together with this buffer. - ///< - }; - - /** - * @brief Resizing positions buffer array class. - * - * This class combines structure array of the CRPosBuf class objects with - * the function that locates or creates buffer with the required resizing - * stepping. - */ - - class CRPosBufArray : public CStructArray { - public: - using CStructArray::add; - using CStructArray::getItemCount; - - /** - * Function returns the resizing positions buffer with the required - * stepping. If no such buffer exists, it is created. - * - * @param k Resizing step. - * @param o Resizing offset. - * @param FracCount The number of fractional delay filters in a filter - * bank used together with this buffer. - * @return Reference to the CRPosBuf object. - */ - - CRPosBuf& getRPosBuf(const double k, const double o, const int FracCount) { - int i; - - for (i = 0; i < getItemCount(); i++) { - CRPosBuf& Buf = (*this)[i]; - - if (Buf.k == k && Buf.o == o && Buf.FracCount == FracCount) { - return (Buf); - } - } - - CRPosBuf& NewBuf = add(); - NewBuf.k = k; - NewBuf.o = o; - NewBuf.FracCount = FracCount; - - return (NewBuf); - } - }; - - CRPosBuf* RPosBuf; ///< Resizing positions buffer. Used when - ///< ResampleFactor equals 0 (resizing step). - ///< - CDSPFracFilterBankLin* FltBank; ///< Filter bank in use by *this - ///< resizing step. - ///< -}; - -/** - * @brief Interleaved filtering steps implementation class. - * - * This class implements scanline filtering functions in interleaved mode. - * This means that each pixel is processed independently, not in groups. - * - * @tparam fptype Floating point type to use for storing pixel elements. SIMD - * types can be used: in this case each element may hold a whole pixel. - * @tparam fptypeatom The atomic type the "fptype" consists of. - */ - -template -class CImageResizerFilterStepINL - : public CImageResizerFilterStep { - public: - using CImageResizerFilterStep::IsUpsample; - using CImageResizerFilterStep::ResampleFactor; - using CImageResizerFilterStep::Flt; - using CImageResizerFilterStep::FltOrig; - using CImageResizerFilterStep::FltLatency; - using CImageResizerFilterStep::Vars; - using CImageResizerFilterStep::InLen; - using CImageResizerFilterStep::InPrefix; - using CImageResizerFilterStep::InSuffix; - using CImageResizerFilterStep::OutLen; - using CImageResizerFilterStep::OutPrefix; - using CImageResizerFilterStep::OutSuffix; - using CImageResizerFilterStep::PrefixDC; - using CImageResizerFilterStep::SuffixDC; - using CImageResizerFilterStep::RPosBuf; - using CImageResizerFilterStep::FltBank; - using CImageResizerFilterStep::EdgePixelCount; - - /** - * Function performs "packing" of a scanline and type conversion. - * Scanline, depending on the "fptype" can be potentially stored as a - * packed SIMD values having a certain atomic type. If required, the sRGB - * gamma correction is applied. - * - * @param ip Input scanline. - * @param op0 Output scanline. - * @param l0 The number of pixels to "pack". - */ - - template - void packScanline(const Tin* ip, fptype* const op0, const int l0) const { - const int ElCount = Vars->ElCount; - const int ElCountIO = Vars->ElCountIO; - fptype* op = op0; - int l = l0; - - if (!Vars->UseSRGBGamma) { - if (ElCountIO == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - op += ElCount; - ip++; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - v[1] = (fptypeatom)ip[1]; - v[2] = (fptypeatom)ip[2]; - v[3] = (fptypeatom)ip[3]; - op += ElCount; - ip += 4; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - v[1] = (fptypeatom)ip[1]; - v[2] = (fptypeatom)ip[2]; - op += ElCount; - ip += 3; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = (fptypeatom)ip[0]; - v[1] = (fptypeatom)ip[1]; - op += ElCount; - ip += 2; - l--; - } - } - } else { - const fptypeatom gm = (fptypeatom)Vars->InGammaMult; - - if (ElCountIO == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - op += ElCount; - ip++; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm); - v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm); - v[3] = convertSRGB2Lin((fptypeatom)ip[3] * gm); - op += ElCount; - ip += 4; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm); - v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm); - op += ElCount; - ip += 3; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op; - v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm); - v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm); - op += ElCount; - ip += 2; - l--; - } - } - } - - const int ZeroCount = ElCount * Vars->fppack - ElCountIO; - op = op0; - l = l0; - - if (ZeroCount == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op + ElCountIO; - v[0] = (fptypeatom)0; - op += ElCount; - l--; - } - } else if (ZeroCount == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op + ElCountIO; - v[0] = (fptypeatom)0; - v[1] = (fptypeatom)0; - op += ElCount; - l--; - } - } else if (ZeroCount == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)op + ElCountIO; - v[0] = (fptypeatom)0; - v[1] = (fptypeatom)0; - v[2] = (fptypeatom)0; - op += ElCount; - l--; - } - } - } - - /** - * Function applies Linear to sRGB gamma correction to the specified - * scanline. - * - * @param p Scanline. - * @param l The number of pixels to de-linearize. - * @param Vars0 Image resizing-related variables. - */ - - static void applySRGBGamma(fptype* p, int l, const CImageResizerVars& Vars0) { - const int ElCount = Vars0.ElCount; - const int ElCountIO = Vars0.ElCountIO; - const fptypeatom gm = (fptypeatom)Vars0.OutGammaMult; - - if (ElCountIO == 1) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - p += ElCount; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - v[1] = convertLin2SRGB(v[1]) * gm; - v[2] = convertLin2SRGB(v[2]) * gm; - v[3] = convertLin2SRGB(v[3]) * gm; - p += ElCount; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - v[1] = convertLin2SRGB(v[1]) * gm; - v[2] = convertLin2SRGB(v[2]) * gm; - p += ElCount; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - fptypeatom* v = (fptypeatom*)p; - v[0] = convertLin2SRGB(v[0]) * gm; - v[1] = convertLin2SRGB(v[1]) * gm; - p += ElCount; - l--; - } - } - } - - /** - * Function converts vertical scanline to horizontal scanline. This - * function is called by the image resizer when image is resized - * vertically. This means that the vertical scanline is stored in the - * same format produced by the packScanline() and maintained by other - * filtering functions. - * - * @param ip Input vertical scanline. - * @param op Output buffer (temporary buffer used during resizing). - * @param SrcLen The number of pixels in the input scanline, also used to - * calculate input buffer increment. - * @param SrcIncr Input buffer increment to the next vertical pixel. - */ - - void convertVtoH(const fptype* ip, fptype* op, const int SrcLen, - const int SrcIncr) const { - const int ElCount = Vars->ElCount; - int j; - - if (ElCount == 1) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - ip += SrcIncr; - op++; - } - } else if (ElCount == 4) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - op[3] = ip[3]; - ip += SrcIncr; - op += 4; - } - } else if (ElCount == 3) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - op[1] = ip[1]; - op[2] = ip[2]; - ip += SrcIncr; - op += 3; - } - } else if (ElCount == 2) { - for (j = 0; j < SrcLen; j++) { - op[0] = ip[0]; - op[1] = ip[1]; - ip += SrcIncr; - op += 2; - } - } - } - - /** - * Function performs "unpacking" of a scanline and type conversion - * (truncation is used when floating point is converted to integer). - * Scanline, depending on the "fptype" can be potentially stored as a - * packed SIMD values having a certain atomic type. The unpacking function - * assumes that scanline is stored in the style produced by the - * packScanline() function. - * - * @param ip Input scanline. - * @param op Output scanline. - * @param l The number of pixels to "unpack". - * @param Vars0 Image resizing-related variables. - */ - - template - static void unpackScanline(const fptype* ip, Tout* op, int l, - const CImageResizerVars& Vars0) { - const int ElCount = Vars0.ElCount; - const int ElCountIO = Vars0.ElCountIO; - - if (ElCountIO == 1) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - ip += ElCount; - op++; - l--; - } - } else if (ElCountIO == 4) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - op[1] = (Tout)v[1]; - op[2] = (Tout)v[2]; - op[3] = (Tout)v[3]; - ip += ElCount; - op += 4; - l--; - } - } else if (ElCountIO == 3) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - op[1] = (Tout)v[1]; - op[2] = (Tout)v[2]; - ip += ElCount; - op += 3; - l--; - } - } else if (ElCountIO == 2) { - while (l > 0) { - const fptypeatom* v = (const fptypeatom*)ip; - op[0] = (Tout)v[0]; - op[1] = (Tout)v[1]; - ip += ElCount; - op += 2; - l--; - } - } - } - - /** - * Function prepares input scanline buffer for *this filtering step. - * Left- and right-most pixels are replicated to make sure no buffer - * overrun happens. Such approach also allows to bypass any pointer - * range checks. - * - * @param Src Source buffer. - */ - - void prepareInBuf(fptype* Src) const { - if (IsUpsample || InPrefix + InSuffix == 0) { - return; - } - - const int ElCount = Vars->ElCount; - replicateArray(Src, ElCount, Src - ElCount, InPrefix, -ElCount); - - Src += (InLen - 1) * ElCount; - replicateArray(Src, ElCount, Src + ElCount, InSuffix, ElCount); - } - - /** - * Function peforms scanline upsampling with filtering. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to ElCount. - * @param Dst Destination scanline buffer. - */ - - void doUpsample(const fptype* const Src, fptype* const Dst) const { - const int ElCount = Vars->ElCount; - fptype* op0 = &Dst[-OutPrefix * ElCount]; - memset(op0, 0, (OutPrefix + OutLen + OutSuffix) * ElCount * sizeof(fptype)); - - const fptype* ip = Src; - const int opstep = ElCount * ResampleFactor; - int l; - - if (FltOrig.getCapacity() > 0) { - // Do not perform filtering, only upsample. - - op0 += (OutPrefix % ResampleFactor) * ElCount; - l = OutPrefix / ResampleFactor; - - if (ElCount == 1) { - while (l > 0) { - op0[0] = ip[0]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0 += opstep; - l--; - } - } else if (ElCount == 4) { - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0[3] = ip[3]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0[3] = ip[3]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0[3] = ip[3]; - op0 += opstep; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0[2] = ip[2]; - op0 += opstep; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while (l >= 0) { - op0[0] = ip[0]; - op0[1] = ip[1]; - op0 += opstep; - l--; - } - } - - return; - } - - const fptype* const f = Flt; - const int flen = Flt.getCapacity(); - fptype* op; - int i; - - if (ElCount == 1) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[i] += f[i] * ip[0]; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[i] += f[i] * ip[0]; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[i] += f[i] * ip[0]; - } - - op0 += opstep; - l--; - } - } else if (ElCount == 4) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op[3] += f[i] * ip[3]; - op += 4; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op[3] += f[i] * ip[3]; - op += 4; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op[3] += f[i] * ip[3]; - op += 4; - } - - op0 += opstep; - l--; - } - } else if (ElCount == 3) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op += 3; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op += 3; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op[2] += f[i] * ip[2]; - op += 3; - } - - op0 += opstep; - l--; - } - } else if (ElCount == 2) { - l = InPrefix; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op += 2; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while (l > 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op += 2; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while (l >= 0) { - op = op0; - - for (i = 0; i < flen; i++) { - op[0] += f[i] * ip[0]; - op[1] += f[i] * ip[1]; - op += 2; - } - - op0 += opstep; - l--; - } - } - - op = op0; - const fptype* dc = SuffixDC; - l = SuffixDC.getCapacity(); - - if (ElCount == 1) { - for (i = 0; i < l; i++) { - op[i] += ip[0] * dc[i]; - } - } else if (ElCount == 4) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - op[3] += ip[3] * dc[0]; - dc++; - op += 4; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - dc++; - op += 3; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - dc++; - op += 2; - l--; - } - } - - ip = Src; - op = Dst - InPrefix * opstep; - dc = PrefixDC; - l = PrefixDC.getCapacity(); - - if (ElCount == 1) { - for (i = 0; i < l; i++) { - op[i] += ip[0] * dc[i]; - } - } else if (ElCount == 4) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - op[3] += ip[3] * dc[0]; - dc++; - op += 4; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - op[2] += ip[2] * dc[0]; - dc++; - op += 3; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - op[0] += ip[0] * dc[0]; - op[1] += ip[1] * dc[0]; - dc++; - op += 2; - l--; - } - } - } - - /** - * Function peforms scanline filtering with optional downsampling. - * Function makes use of the symmetry of the filter. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to ElCount. - * @param Dst Destination scanline buffer. - * @param DstIncr Destination scanline buffer increment, used for - * horizontal or vertical scanline stepping. - */ - - void doFilter(const fptype* const Src, fptype* Dst, const int DstIncr) const { - const int ElCount = Vars->ElCount; - const fptype* const f = &Flt[FltLatency]; - const int flen = FltLatency + 1; - const int ipstep = ElCount * ResampleFactor; - const fptype* ip = Src - EdgePixelCount * ipstep; - const fptype* ip1; - const fptype* ip2; - int l = OutLen; - int i; - - if (ElCount == 1) { - while (l > 0) { - fptype s = f[0] * ip[0]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1++; - ip2--; - s += f[i] * (ip1[0] + ip2[0]); - } - - Dst[0] = s; - Dst += DstIncr; - ip += ipstep; - l--; - } - } else if (ElCount == 4) { - while (l > 0) { - fptype s1 = f[0] * ip[0]; - fptype s2 = f[0] * ip[1]; - fptype s3 = f[0] * ip[2]; - fptype s4 = f[0] * ip[3]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1 += 4; - ip2 -= 4; - s1 += f[i] * (ip1[0] + ip2[0]); - s2 += f[i] * (ip1[1] + ip2[1]); - s3 += f[i] * (ip1[2] + ip2[2]); - s4 += f[i] * (ip1[3] + ip2[3]); - } - - Dst[0] = s1; - Dst[1] = s2; - Dst[2] = s3; - Dst[3] = s4; - Dst += DstIncr; - ip += ipstep; - l--; - } - } else if (ElCount == 3) { - while (l > 0) { - fptype s1 = f[0] * ip[0]; - fptype s2 = f[0] * ip[1]; - fptype s3 = f[0] * ip[2]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1 += 3; - ip2 -= 3; - s1 += f[i] * (ip1[0] + ip2[0]); - s2 += f[i] * (ip1[1] + ip2[1]); - s3 += f[i] * (ip1[2] + ip2[2]); - } - - Dst[0] = s1; - Dst[1] = s2; - Dst[2] = s3; - Dst += DstIncr; - ip += ipstep; - l--; - } - } else if (ElCount == 2) { - while (l > 0) { - fptype s1 = f[0] * ip[0]; - fptype s2 = f[0] * ip[1]; - ip1 = ip; - ip2 = ip; - - for (i = 1; i < flen; i++) { - ip1 += 2; - ip2 -= 2; - s1 += f[i] * (ip1[0] + ip2[0]); - s2 += f[i] * (ip1[1] + ip2[1]); - } - - Dst[0] = s1; - Dst[1] = s2; - Dst += DstIncr; - ip += ipstep; - l--; - } - } - } - - /** - * Function performs resizing of a single scanline. This function does - * not "know" about the length of the source scanline buffer. This buffer - * should be padded with enough pixels so that ( SrcPos - FilterLenD2 ) is - * always >= 0 and ( SrcPos + ( DstLineLen - 1 ) * k + FilterLenD2 + 1 ) - * does not exceed source scanline's buffer length. SrcLine's increment is - * assumed to be equal to ElCount. - * - * @param SrcLine Source scanline buffer. - * @param DstLine Destination (resized) scanline buffer. - * @param DstLineIncr Destination scanline position increment, used for - * horizontal or vertical scanline stepping. - * @param xx Temporary buffer, of size FltBank -> getFilterLen(), must be - * aligned by fpclass :: fpalign. - */ - - void doResize(const fptype* SrcLine, fptype* DstLine, const int DstLineIncr, - fptype* const) const { - const int IntFltLen = FltBank->getFilterLen(); - const int ElCount = Vars->ElCount; - const typename CImageResizerFilterStep::CResizePos* - rpos = &(*RPosBuf)[0]; - - const typename CImageResizerFilterStep< - fptype, fptypeatom>::CResizePos* const rpose = rpos + OutLen; - -#define AVIR_RESIZE_PART1 \ - while (rpos < rpose) { \ - const fptype x = (fptype)rpos->x; \ - const fptype* const ftp = rpos->ftp; \ - const fptype* const ftp2 = ftp + IntFltLen; \ - const fptype* Src = SrcLine + rpos->SrcOffs; \ - int i; - -#define AVIR_RESIZE_PART1nx \ - while (rpos < rpose) { \ - const fptype* const ftp = rpos->ftp; \ - const fptype* Src = SrcLine + rpos->SrcOffs; \ - int i; - -#define AVIR_RESIZE_PART2 \ - DstLine += DstLineIncr; \ - rpos++; \ - } - - if (FltBank->getOrder() == 1) { - if (ElCount == 1) { - AVIR_RESIZE_PART1 - - fptype sum = 0.0; - - for (i = 0; i < IntFltLen; i++) { - sum += (ftp[i] + ftp2[i] * x) * Src[i]; - } - - DstLine[0] = sum; - - AVIR_RESIZE_PART2 - } else if (ElCount == 4) { - AVIR_RESIZE_PART1 - - fptype sum[4]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - sum[3] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i] + ftp2[i] * x; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - sum[3] += xx * Src[3]; - Src += 4; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - DstLine[3] = sum[3]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 3) { - AVIR_RESIZE_PART1 - - fptype sum[3]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i] + ftp2[i] * x; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - Src += 3; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 2) { - AVIR_RESIZE_PART1 - - fptype sum[2]; - sum[0] = 0.0; - sum[1] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i] + ftp2[i] * x; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - Src += 2; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - - AVIR_RESIZE_PART2 - } - } else { - if (ElCount == 1) { - AVIR_RESIZE_PART1nx - - fptype sum = 0.0; - - for (i = 0; i < IntFltLen; i++) { - sum += ftp[i] * Src[i]; - } - - DstLine[0] = sum; - - AVIR_RESIZE_PART2 - } else if (ElCount == 4) { - AVIR_RESIZE_PART1nx - - fptype sum[4]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - sum[3] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i]; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - sum[3] += xx * Src[3]; - Src += 4; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - DstLine[3] = sum[3]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 3) { - AVIR_RESIZE_PART1nx - - fptype sum[3]; - sum[0] = 0.0; - sum[1] = 0.0; - sum[2] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i]; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - sum[2] += xx * Src[2]; - Src += 3; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - DstLine[2] = sum[2]; - - AVIR_RESIZE_PART2 - } else if (ElCount == 2) { - AVIR_RESIZE_PART1nx - - fptype sum[2]; - sum[0] = 0.0; - sum[1] = 0.0; - - for (i = 0; i < IntFltLen; i++) { - const fptype xx = ftp[i]; - sum[0] += xx * Src[0]; - sum[1] += xx * Src[1]; - Src += 2; - } - - DstLine[0] = sum[0]; - DstLine[1] = sum[1]; - - AVIR_RESIZE_PART2 - } - } - } -#undef AVIR_RESIZE_PART2 -#undef AVIR_RESIZE_PART1nx -#undef AVIR_RESIZE_PART1 -}; - -/** - * @brief Image resizer's default dithering class. - * - * This class defines an object that performs rounding, clipping and dithering - * operations over horizontal scanline pixels before scanline is stored in the - * output buffer. - * - * The ditherer should expect the same storage order of the pixels in a - * scanline as used in the "filtering step" class. So, a separate ditherer - * class should be defined for each scanline pixel storage style. The default - * ditherer implements a simple rounding without dithering: it can be used for - * an efficient dithering method which can be multi-threaded. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types can be used. - */ - -template -class CImageResizerDithererDefINL { - public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul, - const double aPkOut) { - Len = aLen; - Vars = &aVars; - LenE = aLen * Vars->ElCount; - TrMul0 = aTrMul; - PkOut0 = aPkOut; - } - - /** - * @return "True" if dithering is recursive relative to scanlines meaning - * multi-threaded execution is not supported by this dithering method. - */ - - static bool isRecursive() { return (false); } - - /** - * Function performs rounding and clipping operations. - * - * @param ResScanline The buffer containing the final scanline. - */ - - void dither(fptype* const ResScanline) const { - const fptype c0 = 0.0; - const fptype PkOut = (fptype)PkOut0; - int j; - - if (TrMul0 == 1.0) { - // Optimization - do not perform bit depth truncation. - - for (j = 0; j < LenE; j++) { - ResScanline[j] = clamp(round(ResScanline[j]), c0, PkOut); - } - } else { - const fptype TrMul = (fptype)TrMul0; - - for (j = 0; j < LenE; j++) { - const fptype z0 = round(ResScanline[j] / TrMul) * TrMul; - ResScanline[j] = clamp(z0, c0, PkOut); - } - } - } - - protected: - int Len; ///< Scanline's length in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int LenE; ///< = LenE * ElCount. - ///< - double TrMul0; ///< Bit-depth truncation multiplier. - ///< - double PkOut0; ///< Peak output value allowed. - ///< -}; - -/** - * @brief Image resizer's error-diffusion dithering class, interleaved mode. - * - * This ditherer implements error-diffusion dithering which looks good, and - * whose results are compressed by PNG well. This implementation uses - * weighting coefficients obtained via machine optimization and visual - * evaluation. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types can be used. - */ - -template -class CImageResizerDithererErrdINL - : public CImageResizerDithererDefINL { - public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul, - const double aPkOut) { - CImageResizerDithererDefINL::init(aLen, aVars, aTrMul, aPkOut); - - ResScanlineDith0.alloc(LenE + Vars->ElCount, sizeof(fptype)); - ResScanlineDith = ResScanlineDith0 + Vars->ElCount; - int i; - - for (i = 0; i < LenE + Vars->ElCount; i++) { - ResScanlineDith0[i] = 0.0; - } - } - - static bool isRecursive() { return (true); } - - void dither(fptype* const ResScanline) { - const int ElCount = Vars->ElCount; - const fptype c0 = 0.0; - const fptype TrMul = (fptype)TrMul0; - const fptype PkOut = (fptype)PkOut0; - int j; - - for (j = 0; j < LenE; j++) { - ResScanline[j] += ResScanlineDith[j]; - ResScanlineDith[j] = 0.0; - } - - for (j = 0; j < LenE - ElCount; j++) { - // Perform rounding, noise estimation and saturation. - - const fptype z0 = round(ResScanline[j] / TrMul) * TrMul; - const fptype Noise = ResScanline[j] - z0; - ResScanline[j] = clamp(z0, c0, PkOut); - - ResScanline[j + ElCount] += Noise * (fptype)0.364842; - ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305; - ResScanlineDith[j] += Noise * (fptype)0.364842; - ResScanlineDith[j + ElCount] += Noise * (fptype)0.063011; - } - - while (j < LenE) { - const fptype z0 = round(ResScanline[j] / TrMul) * TrMul; - const fptype Noise = ResScanline[j] - z0; - ResScanline[j] = clamp(z0, c0, PkOut); - - ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305; - ResScanlineDith[j] += Noise * (fptype)0.364842; - j++; - } - } - - protected: - using CImageResizerDithererDefINL::Len; - using CImageResizerDithererDefINL::Vars; - using CImageResizerDithererDefINL::LenE; - using CImageResizerDithererDefINL::TrMul0; - using CImageResizerDithererDefINL::PkOut0; - - CBuffer ResScanlineDith0; ///< Error diffusion buffer. - ///< - fptype* ResScanlineDith; ///< Error diffusion buffer pointer which skips - ///< the first ElCount elements. - ///< -}; - -/** - * @brief Floating-point processing definition and abstraction class. - * - * This class defines several constants and typedefs that point to classes - * that should be used by the image resizing algorithm. Such "definition - * class" can be used to define alternative scanline processing algorithms - * (e.g. SIMD) and image scanline packing styles used during processing. This - * class also offers an abstraction layer for dithering, rounding and - * clamping (saturation) operation. - * - * The fpclass_def class can be used to define processing using both SIMD and - * non-SIMD types, but using algorithms that are operate on interleaved pixels - * and non-SIMD optimized themselves. - * - * @tparam afptype Floating point type to use for storing intermediate data - * and variables. For variables that are not used in intensive calculations - * the "double" type is always used. On the latest Intel processors (like - * i7-4770K) there is almost no performance difference between "double" and - * "float". Image quality differences between "double" and "float" are not - * apparent on 8-bit images. At the same time the "float" uses half amount of - * working memory the "double" type uses. SIMD types can be used. The - * functions round() and clamp() in the "avir" or other visible namespace - * should be available for the specified type. SIMD types allow to perform - * resizing of images with more than 4 channels, to be exact 4 * SIMD element - * number (e.g. 16 for float4), without modification of the image resizing - * algorithm required. - * @tparam afptypeatom The atomic type the "afptype" consists of. - * @tparam adith Ditherer class to use during processing. - */ - -template > -class fpclass_def { - public: - typedef afptype fptype; ///< Floating-point type to use during processing. - ///< - typedef afptypeatom fptypeatom; ///< Atomic type "fptype" consists of. - ///< - static const int fppack = - sizeof(fptype) / - sizeof(fptypeatom); ///< - ///< The number of atomic types stored in a single - ///< "fptype" element. - ///< - static const int fpalign = - sizeof(fptype); ///< Suggested alignment size - ///< in bytes. This is not a required alignment, because - ///< image resizing algorithm cannot be made to have a - ///< strictly aligned data access at all steps (e.g. - ///< interpolation cannot perform aligned accesses). - ///< - static const int elalign = - 1; ///< Length alignment of arrays of elements. - ///< This applies to filters and intermediate buffers: this constant - ///< forces filters and scanlines to have a length which is a multiple - ///< of this value, for more efficient SIMD implementation. - ///< - static const int packmode = 0; ///< 0 if interleaved packing, 1 if - ///< de-interleaved. - ///< - typedef CImageResizerFilterStepINL - CFilterStep; ///< - ///< Filtering step class to use during processing. - ///< - typedef adith CDitherer; ///< Ditherer class to use during processing. - ///< -}; - -/** - * @brief Image resizer class. - * - * The object of this class can be used to resize 1-4 channel images to any - * required size. Resizing is performed by utilizing interpolated sinc - * fractional delay filters plus (if necessary) a cascade of built-in - * sinc function-based 2X upsampling or 2X downsampling stages, followed by a - * correction filtering. - * - * Object of this class can be allocated on stack. - * - * @tparam fpclass Floating-point processing definition class to use. See - * avir::fpclass_def for more details. - */ - -template > -class CImageResizer { - public: - /** - * Constructor initializes the resizer. - * - * @param aResBitDepth Required bit depth of resulting image (1-16). If - * integer value output is used (e.g. uint8_t), the bit depth also affects - * rounding: for example, if aResBitDepth=6 and "Tout" is uint8_t, the - * result will be rounded to 6 most significant bits (2 least significant - * bits truncated, with dithering applied). - * @param aSrcBitDepth Source image's real bit-depth. Set to 0 to use - * aResBitDepth. - * @param aParams Resizing algorithm's parameters to use. Leave out for - * default values. Can be useful when performing automatic optimization of - * parameters. - */ - - CImageResizer(const int aResBitDepth = 8, const int aSrcBitDepth = 0, - const CImageResizerParams& aParams = CImageResizerParamsDef()) - : Params(aParams), ResBitDepth(aResBitDepth) { - SrcBitDepth = (aSrcBitDepth == 0 ? ResBitDepth : aSrcBitDepth); - - initFilterBank(FixedFilterBank, 1.0, false, CFltBuffer()); - FixedFilterBank.createAllFilters(); - } - - /** - * Function resizes image. - * - * @param SrcBuf Source image buffer. - * @param SrcWidth Source image width. - * @param SrcHeight Source image height. - * @param SrcScanlineSize Physical size of source scanline in elements - * (not bytes). If this value is below 1, SrcWidth * ElCountIO will be - * used as the physical source scanline size. - * @param[out] NewBuf Buffer to accept the resized image. Can be equal to - * SrcBuf if the size of the resized image is smaller or equal to source - * image in size. - * @param NewWidth New image width. - * @param NewHeight New image height. - * @param ElCountIO The number of elements (channels) used to store each - * source and destination pixel (1-4). - * @param k Resizing step (one output pixel corresponds to "k" input - * pixels). A downsizing factor if > 1.0; upsizing factor if <= 1.0. - * Multiply by -1 if you would like to bypass "ox" and "oy" adjustment - * which is done by default to produce a centered image. If step value - * equals 0, the step value will be chosen automatically and independently - * for horizontal and vertical resizing. - * @param[in,out] aVars Pointer to variables structure to be passed to the - * image resizing function. Can be NULL. Only variables that are - * initialized in default constructor of this structure are accepted by - * this function. These variables will not be changed by this function. - * All other variables can be modified by this function. The access to - * this object is not thread-safe, each concurrent instance of this - * function should use a separate aVars object. - * @tparam Tin Input buffer element's type. Can be uint8_t (0-255 value - * range), uint16_t (0-65535 value range), float (0.0-1.0 value range), - * double (0.0-1.0 value range). Larger integer types are treated as - * uint16_t. Signed integer types are unsupported. - * @tparam Tout Output buffer element's type. Can be uint8_t (0-255 value - * range), uint16_t (0-65535 value range), float (0.0-1.0 value range), - * double (0.0-1.0 value range). Larger integer types are treated as - * uint16_t. Signed integer types are unsupported. - */ - - template - void resizeImage(const Tin* const SrcBuf, const int SrcWidth, - const int SrcHeight, int SrcScanlineSize, Tout* const NewBuf, - const int NewWidth, const int NewHeight, const int ElCountIO, - const double k, - CImageResizerVars* const aVars = NULL) const { - if (SrcWidth == 0 || SrcHeight == 0) { - memset(NewBuf, 0, (size_t)NewWidth * NewHeight * sizeof(Tout)); - - return; - } else if (NewWidth == 0 || NewHeight == 0) { - return; - } - - CImageResizerVars DefVars; - CImageResizerVars& Vars = (aVars == NULL ? DefVars : *aVars); - - CImageResizerThreadPool DefThreadPool; - CImageResizerThreadPool& ThreadPool = - (Vars.ThreadPool == NULL ? DefThreadPool : *Vars.ThreadPool); - - // Define resizing steps, also optionally modify offsets so that - // resizing produces a "centered" image. - - double kx; - double ky; - double ox = Vars.ox; - double oy = Vars.oy; - - if (k == 0.0) { - if (NewWidth > SrcWidth) { - kx = (double)(SrcWidth - 1) / (NewWidth - 1); - } else { - kx = (double)SrcWidth / NewWidth; - ox += (kx - 1.0) * 0.5; - } - - if (NewHeight > SrcHeight) { - ky = (double)(SrcHeight - 1) / (NewHeight - 1); - } else { - ky = (double)SrcHeight / NewHeight; - oy += (ky - 1.0) * 0.5; - } - } else if (k > 0.0) { - kx = k; - ky = k; - - if (k > 1.0) { - const double ko = (k - 1.0) * 0.5; - ox += ko; - oy += ko; - } - } else { - kx = -k; - ky = -k; - } - - // Evaluate pre-multipliers used on the output stage. - - const bool IsInFloat = ((Tin)0.4 != 0); - const bool IsOutFloat = ((Tout)0.4 != 0); - double OutMul; // Output multiplier. - - if (Vars.UseSRGBGamma) { - if (IsInFloat) { - Vars.InGammaMult = 1.0; - } else { - Vars.InGammaMult = 1.0 / (sizeof(Tin) == 1 ? 255.0 : 65535.0); - } - - if (IsOutFloat) { - Vars.OutGammaMult = 1.0; - } else { - Vars.OutGammaMult = (sizeof(Tout) == 1 ? 255.0 : 65535.0); - } - - OutMul = 1.0; - } else { - if (IsOutFloat) { - OutMul = 1.0; - } else { - OutMul = (sizeof(Tout) == 1 ? 255.0 : 65535.0); - } - - if (!IsInFloat) { - OutMul /= (sizeof(Tin) == 1 ? 255.0 : 65535.0); - } - } - - // Fill widely-used variables. - - const int ElCount = (ElCountIO + fpclass ::fppack - 1) / fpclass ::fppack; - - const int NewWidthE = NewWidth * ElCount; - - if (SrcScanlineSize < 1) { - SrcScanlineSize = SrcWidth * ElCountIO; - } - - Vars.ElCount = ElCount; - Vars.ElCountIO = ElCountIO; - Vars.fppack = fpclass ::fppack; - Vars.fpalign = fpclass ::fpalign; - Vars.elalign = fpclass ::elalign; - Vars.packmode = fpclass ::packmode; - - // Horizontal scanline filtering and resizing. - - CDSPFracFilterBankLin FltBank; - CFilterSteps FltSteps; - typename CFilterStep ::CRPosBufArray RPosBufArray; - CBuffer UsedFracMap; - - // Perform the filtering steps modeling at various modes, find the - // most efficient mode for both horizontal and vertical resizing. - - int UseBuildMode = 1; - const int BuildModeCount = (FixedFilterBank.getOrder() == 0 ? 4 : 2); - - int m; - - if (Vars.BuildMode >= 0) { - UseBuildMode = Vars.BuildMode; - } else { - int BestScore = 0x7FFFFFFF; - - for (m = 0; m < BuildModeCount; m++) { - CDSPFracFilterBankLin TmpBank; - CFilterSteps TmpSteps; - Vars.k = kx; - Vars.o = ox; - buildFilterSteps(TmpSteps, Vars, TmpBank, OutMul, m, true); - updateFilterStepBuffers(TmpSteps, Vars, RPosBufArray, SrcWidth, - NewWidth); - - fillUsedFracMap(TmpSteps[Vars.ResizeStep], UsedFracMap); - const int c = calcComplexity(TmpSteps, Vars, UsedFracMap, SrcHeight); - - if (c < BestScore) { - UseBuildMode = m; - BestScore = c; - } - } - } - - // Perform the actual filtering steps building. - - Vars.k = kx; - Vars.o = ox; - buildFilterSteps(FltSteps, Vars, FltBank, OutMul, UseBuildMode, false); - - updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcWidth, NewWidth); - - updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth); - - const int ThreadCount = ThreadPool.getSuggestedWorkloadCount(); - // Includes the current thread. - - CStructArray > td; - td.setItemCount(ThreadCount); - int i; - - for (i = 0; i < ThreadCount; i++) { - if (i > 0) { - ThreadPool.addWorkload(&td[i]); - } - - td[i].init(i, ThreadCount, FltSteps, Vars); - - td[i].initScanlineQueue(td[i].sopResizeH, SrcHeight, SrcWidth); - } - - CBuffer FltBuf( - (size_t)NewWidthE * SrcHeight, - fpclass ::fpalign); // Temporary buffer that receives - // horizontally-filtered and resized image. - - for (i = 0; i < SrcHeight; i++) { - td[i % ThreadCount].addScanlineToQueue( - (void*)&SrcBuf[(size_t)i * SrcScanlineSize], - &FltBuf[(size_t)i * NewWidthE]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - - // Vertical scanline filtering and resizing, reuse previously defined - // filtering steps if possible. - - const int PrevUseBuildMode = UseBuildMode; - - if (Vars.BuildMode >= 0) { - UseBuildMode = Vars.BuildMode; - } else { - CImageResizerVars TmpVars(Vars); - int BestScore = 0x7FFFFFFF; - - for (m = 0; m < BuildModeCount; m++) { - CDSPFracFilterBankLin TmpBank; - TmpBank.copyInitParams(FltBank); - CFilterSteps TmpSteps; - TmpVars.k = ky; - TmpVars.o = oy; - buildFilterSteps(TmpSteps, TmpVars, TmpBank, 1.0, m, true); - updateFilterStepBuffers(TmpSteps, TmpVars, RPosBufArray, SrcHeight, - NewHeight); - - fillUsedFracMap(TmpSteps[TmpVars.ResizeStep], UsedFracMap); - - const int c = calcComplexity(TmpSteps, TmpVars, UsedFracMap, NewWidth); - - if (c < BestScore) { - UseBuildMode = m; - BestScore = c; - } - } - } - - Vars.k = ky; - Vars.o = oy; - - if (UseBuildMode == PrevUseBuildMode && ky == kx) { - if (OutMul != 1.0) { - modifyCorrFilterDCGain(FltSteps, 1.0 / OutMul); - } - } else { - buildFilterSteps(FltSteps, Vars, FltBank, 1.0, UseBuildMode, false); - } - - updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcHeight, NewHeight); - - updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth); - - if (IsOutFloat && sizeof(FltBuf[0]) == sizeof(Tout) && - fpclass ::packmode == 0) { - // In-place output. - - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight, - NewWidthE, NewWidthE); - } - - for (i = 0; i < NewWidth; i++) { - td[i % ThreadCount].addScanlineToQueue( - &FltBuf[(size_t)i * ElCount], - (fptype*)&NewBuf[(size_t)i * ElCount]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - ThreadPool.removeAllWorkloads(); - - return; - } - - CBuffer ResBuf((size_t)NewWidthE * NewHeight, - fpclass ::fpalign); - - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight, NewWidthE, - NewWidthE); - } - - const int im = (fpclass ::packmode == 0 ? ElCount : 1); - - for (i = 0; i < NewWidth; i++) { - td[i % ThreadCount].addScanlineToQueue(&FltBuf[(size_t)i * im], - &ResBuf[(size_t)i * im]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - - if (IsOutFloat) { - // Perform output, but skip dithering. - - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopUnpackH, NewHeight, NewWidth); - } - - for (i = 0; i < NewHeight; i++) { - td[i % ThreadCount].addScanlineToQueue( - &ResBuf[(size_t)i * NewWidthE], - &NewBuf[(size_t)i * NewWidth * ElCountIO]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - ThreadPool.removeAllWorkloads(); - - return; - } - - // Perform output with dithering (for integer output only). - - int TruncBits; // The number of lower bits to truncate and dither. - int OutRange; // Output range. - - if (sizeof(Tout) == 1) { - TruncBits = 8 - ResBitDepth; - OutRange = 255; - } else { - TruncBits = 16 - ResBitDepth; - OutRange = 65535; - } - - const double PkOut = OutRange; - const double TrMul = - (TruncBits > 0 ? PkOut / (OutRange >> TruncBits) : 1.0); - - if (CDitherer ::isRecursive()) { - td[0].getDitherer().init(NewWidth, Vars, TrMul, PkOut); - - if (Vars.UseSRGBGamma) { - for (i = 0; i < NewHeight; i++) { - fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE]; - - CFilterStep ::applySRGBGamma(ResScanline, NewWidth, Vars); - - td[0].getDitherer().dither(ResScanline); - - CFilterStep ::unpackScanline( - ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth, - Vars); - } - } else { - for (i = 0; i < NewHeight; i++) { - fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE]; - - td[0].getDitherer().dither(ResScanline); - - CFilterStep ::unpackScanline( - ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth, - Vars); - } - } - } else { - for (i = 0; i < ThreadCount; i++) { - td[i].initScanlineQueue(td[i].sopDitherAndUnpackH, NewHeight, NewWidth); - - td[i].getDitherer().init(NewWidth, Vars, TrMul, PkOut); - } - - for (i = 0; i < NewHeight; i++) { - td[i % ThreadCount].addScanlineToQueue( - &ResBuf[(size_t)i * NewWidthE], - &NewBuf[(size_t)i * NewWidth * ElCountIO]); - } - - ThreadPool.startAllWorkloads(); - td[0].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - } - - ThreadPool.removeAllWorkloads(); - } - - private: - typedef typename fpclass ::fptype fptype; ///< Floating-point type to use - ///< during processing. - ///< - typedef typename fpclass ::CFilterStep - CFilterStep; ///< Filtering step - ///< class to use during processing. - ///< - typedef typename fpclass ::CDitherer CDitherer; ///< Ditherer class to - ///< use during processing. - ///< - CImageResizerParams Params; ///< Algorithm's parameters currently in use. - ///< - int SrcBitDepth; ///< Bit resolution of the source image. - ///< - int ResBitDepth; ///< Bit resolution of the resulting image. - ///< - CDSPFracFilterBankLin - FixedFilterBank; ///< Fractional delay - ///< filter bank with fixed characteristics, mainly for - ///< upsizing cases. - ///< - - /** - * @brief Filtering steps array. - * - * The object of this class stores filtering steps together. - */ - - typedef CStructArray CFilterSteps; - - /** - * Function initializes the filter bank in the specified resizing step - * according to the source and resulting image bit depths. - * - * @param FltBank Filter bank to initialize. - * @param CutoffMult Cutoff multiplier, 0 to 1. 1 corresponds to 0.5pi - * cutoff point. - * @param ForceHiOrder "True" if a high-order interpolation should be - * forced which requires considerably less resources for initialization. - * @param ExtFilter External filter to apply to interpolation filter. - */ - - void initFilterBank(CDSPFracFilterBankLin& FltBank, - const double CutoffMult, const bool ForceHiOrder, - const CFltBuffer& ExtFilter) const { - const int IntBitDepth = - (ResBitDepth > SrcBitDepth ? ResBitDepth : SrcBitDepth); - - const double SNR = -6.02 * (IntBitDepth + 3); - int UseOrder; - int FracCount; // The number of fractional delay filters sampled by - // the filter bank. This variable affects the - // signal-to-noise ratio at interpolation stage. - // Theoretically, at UseOrder==1, 8-bit image resizing - // requires 66.2 dB SNR or 11. 16-bit resizing requires - // 114.4 dB SNR or 150. At UseOrder=0 the required number of - // filters is exponentially higher. - - if (ForceHiOrder || IntBitDepth > 8) { - UseOrder = 1; // -146 dB max - FracCount = (int)ceil(0.23134052 * exp(-0.058062929 * SNR)); - } else { - UseOrder = 0; // -72 dB max - FracCount = (int)ceil(0.33287686 * exp(-0.11334583 * SNR)); - } - - if (FracCount < 2) { - FracCount = 2; - } - - FltBank.init(FracCount, UseOrder, Params.IntFltLen / CutoffMult, - Params.IntFltCutoff * CutoffMult, Params.IntFltAlpha, - ExtFilter, fpclass ::fpalign, fpclass ::elalign); - } - - /** - * Function allocates filter buffer taking "fpclass" alignments into - * account. The allocated buffer may be larger than the requested size: in - * this case the additional elements will be zeroed by this function. - * - * @param Flt Filter buffer. - * @param ReqCapacity The required filter buffer's capacity. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter allocation. - * @param FltExt If non-NULL this variable will receive the number of - * elements the filter was extended by. - */ - - static void allocFilter(CBuffer& Flt, const int ReqCapacity, - const bool IsModel = false, - int* const FltExt = NULL) { - int UseCapacity = - (ReqCapacity + fpclass ::elalign - 1) & ~(fpclass ::elalign - 1); - - int Ext = UseCapacity - ReqCapacity; - - if (FltExt != NULL) { - *FltExt = Ext; - } - - if (IsModel) { - Flt.forceCapacity(UseCapacity); - return; - } - - Flt.alloc(UseCapacity, fpclass ::fpalign); - - while (Ext > 0) { - Ext--; - Flt[ReqCapacity + Ext] = 0.0; - } - } - - /** - * Function assigns filter parameters to the specified filtering step - * object. - * - * @param fs Filtering step to assign parameter to. This step cannot be - * the last step if ResampleFactor greater than 1 was specified. - * @param IsUpsample "True" if upsampling step. Should be set to "false" - * if FltCutoff is negative. - * @param ResampleFactor Resampling factor of this filter (>=1). - * @param FltCutoff Filter cutoff point. This value will be divided by the - * ResampleFactor if IsUpsample equals "true". If zero value was - * specified, the "half-band" predefined filter will be created. In this - * case the ResampleFactor will modify the filter cutoff point. - * @param DCGain DC gain to apply to the filter. Assigned to filtering - * step's DCGain variable. - * @param UseFltOrig "True" if the originally-designed filter should be - * left in filtering step's FltOrig buffer. Otherwise it will be freed. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - void assignFilterParams(CFilterStep& fs, const bool IsUpsample, - const int ResampleFactor, const double FltCutoff, - const double DCGain, const bool UseFltOrig, - const bool IsModel) const { - double FltAlpha; - double Len2; - double Freq; - - if (FltCutoff == 0.0) { - const double m = 2.0 / ResampleFactor; - FltAlpha = Params.HBFltAlpha; - Len2 = 0.5 * Params.HBFltLen / m; - Freq = AVIR_PI * Params.HBFltCutoff * m; - } else { - FltAlpha = Params.LPFltAlpha; - Len2 = 0.25 * Params.LPFltBaseLen / FltCutoff; - Freq = AVIR_PI * Params.LPFltCutoffMult * FltCutoff; - } - - if (IsUpsample) { - Len2 *= ResampleFactor; - Freq /= ResampleFactor; - fs.DCGain = DCGain * ResampleFactor; - } else { - fs.DCGain = DCGain; - } - - fs.FltOrig.Len2 = Len2; - fs.FltOrig.Freq = Freq; - fs.FltOrig.Alpha = FltAlpha; - fs.FltOrig.DCGain = fs.DCGain; - - CDSPPeakedCosineLPF w(Len2, Freq, FltAlpha); - - fs.IsUpsample = IsUpsample; - fs.ResampleFactor = ResampleFactor; - fs.FltLatency = w.fl2; - - int FltExt; // Filter's extension due to fpclass :: elalign. - - if (IsModel) { - allocFilter(fs.Flt, w.FilterLen, true, &FltExt); - - if (UseFltOrig) { - // Allocate a real buffer even in modeling mode since this - // filter may be copied by the filter bank. - - fs.FltOrig.alloc(w.FilterLen); - memset(&fs.FltOrig[0], 0, w.FilterLen * sizeof(fs.FltOrig[0])); - } - } else { - fs.FltOrig.alloc(w.FilterLen); - - w.generateLPF(&fs.FltOrig[0], 1.0); - optimizeFIRFilter(fs.FltOrig, fs.FltLatency); - normalizeFIRFilter(&fs.FltOrig[0], fs.FltOrig.getCapacity(), fs.DCGain); - - allocFilter(fs.Flt, fs.FltOrig.getCapacity(), false, &FltExt); - copyArray(&fs.FltOrig[0], &fs.Flt[0], fs.FltOrig.getCapacity()); - - if (!UseFltOrig) { - fs.FltOrig.free(); - } - } - - if (IsUpsample) { - int l = fs.Flt.getCapacity() - fs.FltLatency - ResampleFactor - FltExt; - - allocFilter(fs.PrefixDC, l, IsModel); - allocFilter(fs.SuffixDC, fs.FltLatency, IsModel); - - if (IsModel) { - return; - } - - // Create prefix and suffix "tails" used during upsampling. - - const fptype* ip = &fs.Flt[fs.FltLatency + ResampleFactor]; - copyArray(ip, &fs.PrefixDC[0], l); - - while (true) { - ip += ResampleFactor; - l -= ResampleFactor; - - if (l <= 0) { - break; - } - - addArray(ip, &fs.PrefixDC[0], l); - } - - l = fs.FltLatency; - fptype* op = &fs.SuffixDC[0]; - copyArray(&fs.Flt[0], op, l); - - while (true) { - op += ResampleFactor; - l -= ResampleFactor; - - if (l <= 0) { - break; - } - - addArray(&fs.Flt[0], op, l); - } - } else if (!UseFltOrig) { - fs.EdgePixelCount = fs.EdgePixelCountDef; - } - } - - /** - * Function adds a correction filter that tries to achieve a linear - * frequency response at all frequencies. The actual resulting response - * may feature a slight damping of the highest frequencies since a - * suitably short correction filter cannot fix steep high-frequency - * damping. - * - * This function assumes that the resizing step is currently the last - * step, even if it was not inserted yet: this allows placement of the - * correction filter both before and after the resizing step. - * - * @param Steps Filtering steps. - * @param bw Resulting bandwidth relative to the original bandwidth (which - * is 1.0), usually 1/k. Should be <= 1.0. - * @param IsPreCorrection "True" if the filtering step was already created - * and it is first in the Steps array. "True" also adds edge pixels to - * reduce edge artifacts. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - void addCorrectionFilter(CFilterSteps& Steps, const double bw, - const bool IsPreCorrection, - const bool IsModel) const { - CFilterStep& fs = (IsPreCorrection ? Steps[0] : Steps.add()); - fs.IsUpsample = false; - fs.ResampleFactor = 1; - fs.DCGain = 1.0; - fs.EdgePixelCount = (IsPreCorrection ? fs.EdgePixelCountDef : 0); - - if (IsModel) { - allocFilter( - fs.Flt, - CDSPFIREQ ::calcFilterLength(Params.CorrFltLen, fs.FltLatency), true); - - return; - } - - const int BinCount = 65; // Frequency response bins to control. - const int BinCount1 = BinCount - 1; - double curbw = 1.0; // Bandwidth of the filter at the current step. - int i; - int j; - double re; - double im; - - CBuffer Bins(BinCount); // Adjustment introduced by all - // steps at all frequencies of interest. - - for (j = 0; j < BinCount; j++) { - Bins[j] = 1.0; - } - - const int si = (IsPreCorrection ? 1 : 0); - - for (i = si; i < Steps.getItemCount() - (si ^ 1); i++) { - const CFilterStep& fs = Steps[i]; - - if (fs.IsUpsample) { - curbw *= fs.ResampleFactor; - - if (fs.FltOrig.getCapacity() > 0) { - continue; - } - } - - const double dcg = 1.0 / fs.DCGain; // DC gain correction. - const fptype* Flt; - int FltLen; - - if (fs.ResampleFactor == 0) { - Flt = fs.FltBank->getFilter(0); - FltLen = fs.FltBank->getFilterLen(); - } else { - Flt = &fs.Flt[0]; - FltLen = fs.Flt.getCapacity(); - } - - // Calculate frequency response adjustment introduced by the - // filter at this step, within the bounds of bandwidth of - // interest. - - for (j = 0; j < BinCount; j++) { - const double th = AVIR_PI * bw / curbw * j / BinCount1; - - calcFIRFilterResponse(Flt, FltLen, th, re, im); - - Bins[j] /= sqrt(re * re + im * im) * dcg; - } - - if (!fs.IsUpsample && fs.ResampleFactor > 1) { - curbw /= fs.ResampleFactor; - } - } - - // Calculate filter. - - CDSPFIREQ EQ; - EQ.init(bw * 2.0, Params.CorrFltLen, BinCount, 0.0, bw, false, - Params.CorrFltAlpha); - - fs.FltLatency = EQ.getFilterLatency(); - - CBuffer Filter(EQ.getFilterLength()); - EQ.buildFilter(Bins, &Filter[0]); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - optimizeFIRFilter(Filter, fs.FltLatency); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - - allocFilter(fs.Flt, Filter.getCapacity()); - copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity()); - - // Print a theoretically achieved final frequency response at various - // feature sizes (from DC to 1 pixel). Values above 255 means features - // become brighter, values below 255 means features become dimmer. - - /* const double sbw = ( bw > 1.0 ? 1.0 / bw : 1.0 ); - - for( j = 0; j < BinCount; j++ ) - { - const double th = AVIR_PI * sbw * j / BinCount1; - - calcFIRFilterResponse( &fs.Flt[ 0 ], - fs.Flt.getCapacity(), th, re, im ); - - printf( "%f\n", sqrt( re * re + im * im ) / Bins[ j - ] * 255 ); - } - - printf( "***\n" );*/ - } - - /** - * Function adds a sharpening filter if image is being upsized. Such - * sharpening allows to spot interpolation filter's stop-band attenuation: - * if attenuation is too weak, a "dark grid" and other artifacts may - * become visible. - * - * It is assumed that 40 decibel stop-band attenuation should be - * considered a required minimum: this allows application of (deliberately - * strong) 64X sharpening without spotting any artifacts. - * - * @param Steps Filtering steps. - * @param bw Resulting bandwidth relative to the original bandwidth (which - * is 1.0), usually 1/k. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - static void addSharpenTest(CFilterSteps& Steps, const double bw, - const bool IsModel) { - if (bw <= 1.0) { - return; - } - - const double FltLen = 10.0 * bw; - - CFilterStep& fs = Steps.add(); - fs.IsUpsample = false; - fs.ResampleFactor = 1; - fs.DCGain = 1.0; - fs.EdgePixelCount = 0; - - if (IsModel) { - allocFilter(fs.Flt, CDSPFIREQ ::calcFilterLength(FltLen, fs.FltLatency), - true); - - return; - } - - const int BinCount = 200; - CBuffer Bins(BinCount); - int Thresh = (int)round(BinCount / bw * 1.75); - - if (Thresh > BinCount) { - Thresh = BinCount; - } - - int j; - - for (j = 0; j < Thresh; j++) { - Bins[j] = 1.0; - } - - for (j = Thresh; j < BinCount; j++) { - Bins[j] = 256.0; - } - - CDSPFIREQ EQ; - EQ.init(bw * 2.0, FltLen, BinCount, 0.0, bw, false, 1.7); - - fs.FltLatency = EQ.getFilterLatency(); - - CBuffer Filter(EQ.getFilterLength()); - EQ.buildFilter(Bins, &Filter[0]); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - optimizeFIRFilter(Filter, fs.FltLatency); - normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0); - - allocFilter(fs.Flt, Filter.getCapacity()); - copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity()); - - /* for( j = 0; j < BinCount; j++ ) - { - const double th = AVIR_PI * j / ( BinCount - 1 ); - double re; - double im; - - calcFIRFilterResponse( &fs.Flt[ 0 ], - fs.Flt.getCapacity(), th, re, im ); - - printf( "%f\n", sqrt( re * re + im * im )); - } - - printf( "***\n" );*/ - } - - /** - * Function builds sequence of filtering steps depending on the specified - * resizing coefficient. The last steps included are always the resizing - * step then (possibly) the correction step. - * - * @param Steps Array that receives filtering steps. - * @param[out] Vars Variables object. - * @param FltBank Filter bank to initialize and use. - * @param DCGain The overall DC gain to apply. This DC gain is applied to - * the first filtering step only (upsampling or filtering step). - * @param ModeFlags Build mode flags to use. This is a bitmap of switches - * that enable or disable certain algorithm features. - * @param IsModel "True" if filtering steps modeling is performed without - * the actual filter allocation and building. - */ - - void buildFilterSteps(CFilterSteps& Steps, CImageResizerVars& Vars, - CDSPFracFilterBankLin& FltBank, - const double DCGain, const int ModeFlags, - const bool IsModel) const { - Steps.clear(); - - const bool DoFltAndIntCombo = - ((ModeFlags & 1) != 0); // Do filter - // and interpolator combining. - const bool ForceHiOrderInt = - ((ModeFlags & 2) != 0); // Force use - // of a higher-order interpolation. - const bool UseHalfband = ((ModeFlags & 4) != 0); // Use half-band - // filter. - - const double bw = 1.0 / Vars.k; // Resulting bandwidth. - const int UpsampleFactor = ((int)floor(Vars.k) < 2 ? 2 : 1); - double IntCutoffMult; // Interpolation filter cutoff multiplier. - CFilterStep* ReuseStep; // If not NULL, resizing step should use - // this step object instead of creating a new one. - CFilterStep* ExtFltStep; // Use FltOrig of this step as the external - // filter to applied to the interpolator. - bool IsPreCorrection; // "True" if the correction filter is applied - // first. - double FltCutoff; // Cutoff frequency of the first filtering step. - double corrbw; ///< Bandwidth at the correction step. - - if (Vars.k <= 1.0) { - IsPreCorrection = true; - FltCutoff = 1.0; - corrbw = 1.0; - Steps.add(); - } else { - IsPreCorrection = false; - FltCutoff = bw; - corrbw = bw; - } - - // Add 1 upsampling or several downsampling filters. - - if (UpsampleFactor > 1) { - CFilterStep& fs = Steps.add(); - assignFilterParams(fs, true, UpsampleFactor, FltCutoff, DCGain, - DoFltAndIntCombo, IsModel); - - IntCutoffMult = FltCutoff * 2.0 / UpsampleFactor; - ReuseStep = NULL; - ExtFltStep = (DoFltAndIntCombo ? &fs : NULL); - } else { - int DownsampleFactor; - - while (true) { - DownsampleFactor = (int)floor(0.5 / FltCutoff); - bool DoHBFltAdd; - - if (DownsampleFactor > 16) { - // Add half-band filter unconditionally in order to keep - // filter lengths lower for more precise frequency - // response and less edge artifacts. - - DoHBFltAdd = true; - DownsampleFactor = 16; - } else { - DoHBFltAdd = (UseHalfband && DownsampleFactor > 1); - } - - if (DoHBFltAdd) { - assignFilterParams(Steps.add(), false, DownsampleFactor, 0.0, 1.0, - false, IsModel); - - FltCutoff *= DownsampleFactor; - } else { - if (DownsampleFactor < 1) { - DownsampleFactor = 1; - } - - break; - } - } - - CFilterStep& fs = Steps.add(); - assignFilterParams(fs, false, DownsampleFactor, FltCutoff, DCGain, - DoFltAndIntCombo, IsModel); - - IntCutoffMult = FltCutoff / 0.5; - - if (DoFltAndIntCombo) { - ReuseStep = &fs; - ExtFltStep = &fs; - } else { - IntCutoffMult *= DownsampleFactor; - ReuseStep = NULL; - ExtFltStep = NULL; - } - } - - // Insert resizing and correction steps. - - CFilterStep& fs = (ReuseStep == NULL ? Steps.add() : *ReuseStep); - - Vars.ResizeStep = Steps.getItemCount() - 1; - fs.IsUpsample = false; - fs.ResampleFactor = 0; - fs.DCGain = (ExtFltStep == NULL ? 1.0 : ExtFltStep->DCGain); - - initFilterBank(FltBank, IntCutoffMult, ForceHiOrderInt, - (ExtFltStep == NULL ? fs.FltOrig : ExtFltStep->FltOrig)); - - if (FltBank == FixedFilterBank) { - fs.FltBank = (CDSPFracFilterBankLin*)&FixedFilterBank; - } else { - fs.FltBank = &FltBank; - } - - addCorrectionFilter(Steps, corrbw, IsPreCorrection, IsModel); - - // addSharpenTest( Steps, bw, IsModel ); - } - - /** - * Function extends *this upsampling step so that it produces more - * upsampled pixels that cover the prefix and suffix needs of the next - * step. After the call to this function the InPrefix and InSuffix - * variables of the next step will be set to zero. - * - * @param fs Upsampling filtering step. - * @param NextStep The next step structure. - */ - - static void extendUpsample(CFilterStep& fs, CFilterStep& NextStep) { - fs.InPrefix = - (NextStep.InPrefix + fs.ResampleFactor - 1) / fs.ResampleFactor; - - fs.OutPrefix += fs.InPrefix * fs.ResampleFactor; - NextStep.InPrefix = 0; - - fs.InSuffix = - (NextStep.InSuffix + fs.ResampleFactor - 1) / fs.ResampleFactor; - - fs.OutSuffix += fs.InSuffix * fs.ResampleFactor; - NextStep.InSuffix = 0; - } - - /** - * Function fills resizing step's RPosBuf array, excluding the actual - * "ftp" pointers and "SrcOffs" offsets. - * - * This array should be cleared if the resizing step or offset were - * changed. Otherwise this function only fills the elements required to - * cover resizing step's OutLen. - * - * This function is called by the updateFilterStepBuffers() function. - * - * @param fs Resizing step. - * @param Vars Variables object. - */ - - static void fillRPosBuf(CFilterStep& fs, const CImageResizerVars& Vars) { - const int PrevLen = fs.RPosBuf->getCapacity(); - - if (fs.OutLen > PrevLen) { - fs.RPosBuf->increaseCapacity(fs.OutLen); - } - - typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[PrevLen]; - const int FracCount = fs.FltBank->getFracCount(); - const double o = Vars.o; - const double k = Vars.k; - int i; - - for (i = PrevLen; i < fs.OutLen; i++) { - const double SrcPos = o + k * i; - const int SrcPosInt = (int)floor(SrcPos); - const double x = (SrcPos - SrcPosInt) * FracCount; - const int fti = (int)x; - rpos->x = (typename fpclass ::fptypeatom)(x - fti); - rpos->fti = fti; - rpos->SrcPosInt = SrcPosInt; - rpos++; - } - } - - /** - * Function updates filtering step buffer lengths depending on the - * specified source and new scanline lengths. This function should be - * called after the buildFilterSteps() function. - * - * @param Steps Array that receives filtering steps. - * @param[out] Vars Variables object, will receive buffer size and length. - * This function expects "k" and "o" variable values that will be - * adjusted by this function. - * @param RPosBufArray Resizing position buffers array, used to obtain - * buffer to initialize and use (will be reused if it is already fully or - * partially filled). - * @param SrcLen Source scanline's length in pixels. - * @param NewLen New scanline's length in pixels. - */ - - static void updateFilterStepBuffers( - CFilterSteps& Steps, CImageResizerVars& Vars, - typename CFilterStep ::CRPosBufArray& RPosBufArray, int SrcLen, - const int NewLen) { - int upstep = -1; - int InBuf = 0; - int i; - - for (i = 0; i < Steps.getItemCount(); i++) { - CFilterStep& fs = Steps[i]; - - fs.Vars = &Vars; - fs.InLen = SrcLen; - fs.InBuf = InBuf; - fs.OutBuf = (InBuf + 1) & 1; - - if (fs.IsUpsample) { - upstep = i; - Vars.k *= fs.ResampleFactor; - Vars.o *= fs.ResampleFactor; - fs.InPrefix = 0; - fs.InSuffix = 0; - fs.OutLen = fs.InLen * fs.ResampleFactor; - fs.OutPrefix = fs.FltLatency; - fs.OutSuffix = fs.Flt.getCapacity() - fs.FltLatency - fs.ResampleFactor; - - int l0 = fs.OutPrefix + fs.OutLen + fs.OutSuffix; - int l = fs.InLen * fs.ResampleFactor + fs.SuffixDC.getCapacity(); - - if (l > l0) { - fs.OutSuffix += l - l0; - } - - l0 = fs.OutLen + fs.OutSuffix; - - if (fs.PrefixDC.getCapacity() > l0) { - fs.OutSuffix += fs.PrefixDC.getCapacity() - l0; - } - } else if (fs.ResampleFactor == 0) { - const int FilterLenD2 = fs.FltBank->getFilterLen() / 2; - const int FilterLenD21 = FilterLenD2 - 1; - - const int ResizeLPix = (int)floor(Vars.o) - FilterLenD21; - fs.InPrefix = (ResizeLPix < 0 ? -ResizeLPix : 0); - const int ResizeRPix = - (int)floor(Vars.o + (NewLen - 1) * Vars.k) + FilterLenD2 + 1; - - fs.InSuffix = (ResizeRPix > fs.InLen ? ResizeRPix - fs.InLen : 0); - - fs.OutLen = NewLen; - fs.RPosBuf = &RPosBufArray.getRPosBuf(Vars.k, Vars.o, - fs.FltBank->getFracCount()); - - fillRPosBuf(fs, Vars); - } else { - Vars.k /= fs.ResampleFactor; - Vars.o /= fs.ResampleFactor; - Vars.o += fs.EdgePixelCount; - - fs.InPrefix = fs.FltLatency; - fs.InSuffix = fs.Flt.getCapacity() - fs.FltLatency - 1; - - // Additionally extend OutLen to produce more precise edge - // pixels. - - fs.OutLen = (fs.InLen + fs.ResampleFactor - 1) / fs.ResampleFactor + - fs.EdgePixelCount; - - fs.InSuffix += (fs.OutLen - 1) * fs.ResampleFactor + 1 - fs.InLen; - - fs.InPrefix += fs.EdgePixelCount * fs.ResampleFactor; - fs.OutLen += fs.EdgePixelCount; - } - - InBuf = fs.OutBuf; - SrcLen = fs.OutLen; - } - - Steps[Steps.getItemCount() - 1].OutBuf = 2; - - if (upstep != -1) { - extendUpsample(Steps[upstep], Steps[upstep + 1]); - } - } - - /** - * Function calculates an optimal intermediate buffer length that will - * cover all needs of the specified filtering steps. This function should - * be called after the updateFilterStepBuffers() function. - * - * Function also updates resizing step's RPosBuf pointers to the filter - * bank and SrcOffs values. - * - * @param Steps Filtering steps. - * @param[out] Vars Variables object, will receive buffer size and length. - * @param ResElIncr Resulting (final) element increment, used to produce - * de-interleaved result. For horizontal processing this value is equal - * to last step's OutLen, for vertical processing this value is equal to - * resulting image's width. - */ - - static void updateBufLenAndRPosPtrs(CFilterSteps& Steps, - CImageResizerVars& Vars, - const int ResElIncr) { - int MaxPrefix[2] = {0, 0}; - int MaxLen[2] = {0, 0}; - int i; - - for (i = 0; i < Steps.getItemCount(); i++) { - CFilterStep& fs = Steps[i]; - const int ib = fs.InBuf; - - if (fs.InPrefix > MaxPrefix[ib]) { - MaxPrefix[ib] = fs.InPrefix; - } - - int l = fs.InLen + fs.InSuffix; - - if (l > MaxLen[ib]) { - MaxLen[ib] = l; - } - - fs.InElIncr = fs.InPrefix + l; - - if (fs.OutBuf == 2) { - break; - } - - const int ob = fs.OutBuf; - - if (fs.IsUpsample) { - if (fs.OutPrefix > MaxPrefix[ob]) { - MaxPrefix[ob] = fs.OutPrefix; - } - - l = fs.OutLen + fs.OutSuffix; - - if (l > MaxLen[ob]) { - MaxLen[ob] = l; - } - } else { - if (fs.OutLen > MaxLen[ob]) { - MaxLen[ob] = fs.OutLen; - } - } - } - - // Update OutElIncr values of all steps. - - for (i = 0; i < Steps.getItemCount(); i++) { - CFilterStep& fs = Steps[i]; - - if (fs.OutBuf == 2) { - fs.OutElIncr = ResElIncr; - break; - } - - CFilterStep& fs2 = Steps[i + 1]; - - if (fs.IsUpsample) { - fs.OutElIncr = fs.OutPrefix + fs.OutLen + fs.OutSuffix; - - if (fs.OutElIncr > fs2.InElIncr) { - fs2.InElIncr = fs.OutElIncr; - } else { - fs.OutElIncr = fs2.InElIncr; - } - } else { - fs.OutElIncr = fs2.InElIncr; - } - } - - // Update temporary buffer's length. - - for (i = 0; i < 2; i++) { - Vars.BufLen[i] = MaxPrefix[i] + MaxLen[i]; - Vars.BufOffs[i] = MaxPrefix[i]; - - if (Vars.packmode == 0) { - Vars.BufOffs[i] *= Vars.ElCount; - } - - Vars.BufLen[i] *= Vars.ElCount; - } - - // Update RPosBuf pointers and SrcOffs. - - CFilterStep& fs = Steps[Vars.ResizeStep]; - typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0]; - const int em = (fpclass ::packmode == 0 ? Vars.ElCount : 1); - const int FilterLenD21 = fs.FltBank->getFilterLen() / 2 - 1; - - for (i = 0; i < fs.OutLen; i++) { - rpos->ftp = fs.FltBank->getFilter(rpos->fti); - rpos->SrcOffs = (rpos->SrcPosInt - FilterLenD21) * em; - rpos++; - } - } - - /** - * Function modifies the overall (DC) gain of the correction filter in the - * pre-built filtering steps array. - * - * @param Steps Filtering steps. - * @param m Multiplier to apply to the correction filter. - */ - - void modifyCorrFilterDCGain(CFilterSteps& Steps, const double m) const { - CBuffer* Flt; - const int z = Steps.getItemCount() - 1; - - if (!Steps[z].IsUpsample && Steps[z].ResampleFactor == 1) { - Flt = &Steps[z].Flt; - } else { - Flt = &Steps[0].Flt; - } - - int i; - - for (i = 0; i < Flt->getCapacity(); i++) { - (*Flt)[i] = (fptype)((double)(*Flt)[i] * m); - } - } - - /** - * Function builds a map of used fractional delay filters based on the - * resizing positions buffer. - * - * @param fs Resizing step. - * @param[out] UsedFracMap Map of used fractional delay filters. - */ - - static void fillUsedFracMap(const CFilterStep& fs, - CBuffer& UsedFracMap) { - const int FracCount = fs.FltBank->getFracCount(); - UsedFracMap.increaseCapacity(FracCount, false); - memset(&UsedFracMap[0], 0, FracCount * sizeof(UsedFracMap[0])); - - typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0]; - int i; - - for (i = 0; i < fs.OutLen; i++) { - UsedFracMap[rpos->fti] |= 1; - rpos++; - } - } - - /** - * Function calculates the overall filtering steps complexity per - * scanline. Each complexity unit corresponds to a single multiply-add - * operation. Data copy and pointer math operations are not included in - * this calculation, it is assumed that they correlate to the multiply-add - * operations. Calculation also does not include final rounding, dithering - * and clamping operations since they cannot be optimized out anyway. - * - * Calculation of the CRPosBuf buffer is not included since it cannot be - * avoided. - * - * This function should be called after the updateFilterStepBuffers() - * function. - * - * @param Steps Filtering steps array. - * @param Vars Variables object. - * @param UsedFracMap The map of used fractional delay filters. - * @param ScanlineCount Scanline count. - */ - - static int calcComplexity(const CFilterSteps& Steps, - const CImageResizerVars& Vars, - const CBuffer& UsedFracMap, - const int ScanlineCount) { - int fcnum; // Filter complexity multiplier numerator. - int fcdenom; // Filter complexity multiplier denominator. - - if (Vars.packmode != 0) { - fcnum = 1; - fcdenom = 1; - } else { - // In interleaved processing mode, filters require 1 less - // multiplication per 2 multiply-add instructions. - - fcnum = 3; - fcdenom = 4; - } - - int s = 0; // Complexity per one scanline. - int s2 = 0; // Complexity per all scanlines. - int i; - - for (i = 0; i < Steps.getItemCount(); i++) { - const CFilterStep& fs = Steps[i]; - - s2 += 65 * fs.Flt.getCapacity(); // Filter creation complexity. - - if (fs.IsUpsample) { - if (fs.FltOrig.getCapacity() > 0) { - continue; - } - - s += (fs.Flt.getCapacity() * (fs.InPrefix + fs.InLen + fs.InSuffix) + - fs.SuffixDC.getCapacity() + fs.PrefixDC.getCapacity()) * - Vars.ElCount; - } else if (fs.ResampleFactor == 0) { - s += fs.FltBank->getFilterLen() * - (fs.FltBank->getOrder() + Vars.ElCount) * fs.OutLen; - - s2 += fs.FltBank->calcInitComplexity(UsedFracMap); - } else { - s += fs.Flt.getCapacity() * Vars.ElCount * fs.OutLen * fcnum / fcdenom; - } - } - - return (s + s2 / ScanlineCount); - } - - /** - * @brief Thread-isolated data used for scanline processing. - * - * This structure holds data necessary for image's horizontal or vertical - * scanline processing, including scanline processing queue. - * - * @tparam Tin Source element data type. Intermediate buffers store data - * in floating point format. - * @tparam Tout Destination element data type. Intermediate buffers store - * data in floating point format. - */ - - template - class CThreadData : public CImageResizerThreadPool ::CWorkload { - public: - virtual void process() { processScanlineQueue(); } - - /** - * This enumeration lists possible scanline operations. - */ - - enum EScanlineOperation { - sopResizeH, ///< Resize horizontal scanline. - ///< - sopResizeV, ///< Resize vertical scanline. - ///< - sopDitherAndUnpackH, ///< Dither and unpack horizontal scanline. - ///< - sopUnpackH ///< Unpack horizontal scanline. - ///< - }; - - /** - * Function initializes *this thread data object and assigns certain - * variables provided by the higher level code. - * - * @param aThreadIndex Index of this thread data (0-based). - * @param aThreadCount Total number of threads used during processing. - * @param aSteps Filtering steps. - * @param aVars Image resizer variables. - */ - - void init(const int aThreadIndex, const int aThreadCount, - const CFilterSteps& aSteps, const CImageResizerVars& aVars) { - ThreadIndex = aThreadIndex; - ThreadCount = aThreadCount; - Steps = &aSteps; - Vars = &aVars; - } - - /** - * Function initializes scanline processing queue, and updates - * capacities of intermediate buffers. - * - * @param aOp Operation to perform over scanline. - * @param TotalLines The total number of scanlines that will be - * processed by all threads. - * @param aSrcLen Source scanline length in pixels. - * @param aSrcIncr Source scanline buffer increment. Ignored in - * horizontal scanline processing. - * @param aResIncr Resulting scanline buffer increment. Ignored in - * horizontal scanline processing. - */ - - void initScanlineQueue(const EScanlineOperation aOp, const int TotalLines, - const int aSrcLen, const int aSrcIncr = 0, - const int aResIncr = 0) { - const int l = Vars->BufLen[0] + Vars->BufLen[1]; - - if (Bufs.getCapacity() < l) { - Bufs.alloc(l, fpclass ::fpalign); - } - - BufPtrs[0] = Bufs + Vars->BufOffs[0]; - BufPtrs[1] = Bufs + Vars->BufLen[0] + Vars->BufOffs[1]; - - int j; - int ml = 0; - - for (j = 0; j < Steps->getItemCount(); j++) { - const CFilterStep& fs = (*Steps)[j]; - - if (fs.ResampleFactor == 0 && ml < fs.FltBank->getFilterLen()) { - ml = fs.FltBank->getFilterLen(); - } - } - - TmpFltBuf.alloc(ml, fpclass ::fpalign); - ScanlineOp = aOp; - SrcLen = aSrcLen; - SrcIncr = aSrcIncr; - ResIncr = aResIncr; - QueueLen = 0; - Queue.increaseCapacity((TotalLines + ThreadCount - 1) / ThreadCount, - false); - } - - /** - * Function adds a scanline to the queue buffer. The - * initScanlineQueue() function should be called before calling this - * function. The number of calls to this add function should not - * exceed the TotalLines spread over all threads. - * - * @param SrcBuf Source scanline buffer. - * @param ResBuf Resulting scanline buffer. - */ - - void addScanlineToQueue(void* const SrcBuf, void* const ResBuf) { - Queue[QueueLen].SrcBuf = SrcBuf; - Queue[QueueLen].ResBuf = ResBuf; - QueueLen++; - } - - /** - * Function processes all queued scanlines. - */ - - void processScanlineQueue() { - int i; - - switch (ScanlineOp) { - case sopResizeH: { - for (i = 0; i < QueueLen; i++) { - resizeScanlineH((Tin*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf); - } - - break; - } - - case sopResizeV: { - for (i = 0; i < QueueLen; i++) { - resizeScanlineV((fptype*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf); - } - - break; - } - - case sopDitherAndUnpackH: { - if (Vars->UseSRGBGamma) { - for (i = 0; i < QueueLen; i++) { - CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen, - *Vars); - - Ditherer.dither((fptype*)Queue[i].SrcBuf); - - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } else { - for (i = 0; i < QueueLen; i++) { - Ditherer.dither((fptype*)Queue[i].SrcBuf); - - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } - - break; - } - - case sopUnpackH: { - if (Vars->UseSRGBGamma) { - for (i = 0; i < QueueLen; i++) { - CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen, - *Vars); - - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } else { - for (i = 0; i < QueueLen; i++) { - CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf, - (Tout*)Queue[i].ResBuf, SrcLen, - *Vars); - } - } - - break; - } - } - } - - /** - * Function returns ditherer object associated with *this thread data - * object. - */ - - CDitherer& getDitherer() { return (Ditherer); } - - private: - int ThreadIndex; ///< Thread index. - ///< - int ThreadCount; ///< Thread count. - ///< - const CFilterSteps* Steps; ///< Filtering steps. - ///< - const CImageResizerVars* Vars; ///< Image resizer variables. - ///< - CBuffer Bufs; ///< Flip-flop intermediate buffers. - ///< - fptype* BufPtrs[3]; ///< Flip-flop buffer pointers (referenced by - ///< filtering step's InBuf and OutBuf indices). - ///< - CBuffer - TmpFltBuf; ///< Temporary buffer used in the - ///< doResize() function, aligned by fpclass :: fpalign. - ///< - EScanlineOperation ScanlineOp; ///< Operation to perform over - ///< scanline. - ///< - int SrcLen; ///< Source scanline length in the last queue. - ///< - int SrcIncr; ///< Source scanline buffer increment in the last queue. - ///< - int ResIncr; ///< Resulting scanline buffer increment in the last - ///< queue. - ///< - CDitherer Ditherer; ///< Ditherer object to use. - ///< - - /** - * @brief Scanline processing queue item. - * - * Scanline processing queue item. - */ - - struct CQueueItem { - void* SrcBuf; ///< Source scanline buffer, will by typecasted to - ///< Tin or fptype*. - ///< - void* ResBuf; ///< Resulting scanline buffer, will by typecasted - ///< to Tout or fptype*. - ///< - }; - - CBuffer Queue; ///< Scanline processing queue. - ///< - int QueueLen; ///< Queue length. - ///< - - /** - * Function resizes a single horizontal scanline. - * - * @param SrcBuf Source scanline buffer. Can be either horizontal or - * vertical. - * @param ResBuf Resulting scanline buffer. - */ - - void resizeScanlineH(const Tin* const SrcBuf, fptype* const ResBuf) { - (*Steps)[0].packScanline(SrcBuf, BufPtrs[0], SrcLen); - BufPtrs[2] = ResBuf; - int j; - - for (j = 0; j < Steps->getItemCount(); j++) { - const CFilterStep& fs = (*Steps)[j]; - fs.prepareInBuf(BufPtrs[fs.InBuf]); - const int DstIncr = (Vars->packmode == 0 ? Vars->ElCount : 1); - - if (fs.ResampleFactor != 0) { - if (fs.IsUpsample) { - fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]); - } else { - fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr); - } - } else { - fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr, - TmpFltBuf); - } - } - } - - /** - * Function resizes a single vertical scanline. - * - * @param SrcBuf Source scanline buffer. Can be either horizontal or - * vertical. - * @param ResBuf Resulting scanline buffer. - */ - - void resizeScanlineV(const fptype* const SrcBuf, fptype* const ResBuf) { - (*Steps)[0].convertVtoH(SrcBuf, BufPtrs[0], SrcLen, SrcIncr); - - BufPtrs[2] = ResBuf; - int j; - - for (j = 0; j < Steps->getItemCount(); j++) { - const CFilterStep& fs = (*Steps)[j]; - fs.prepareInBuf(BufPtrs[fs.InBuf]); - const int DstIncr = - (fs.OutBuf == 2 ? ResIncr - : (Vars->packmode == 0 ? Vars->ElCount : 1)); - - if (fs.ResampleFactor != 0) { - if (fs.IsUpsample) { - fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]); - } else { - fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr); - } - } else { - fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr, - TmpFltBuf); - } - } - } - }; -}; - -#undef AVIR_PI -#undef AVIR_PId2 - -} // namespace avir - -#endif // AVIR_CIMAGERESIZER_INCLUDED -/* clang-format off */ -//$ nobt -//$ nocpp -#include "libc/calls/calls.h" - -/** - * @file avir.h - * - * @brief The "main" inclusion file with all required classes and functions. - * - * This is the "main" inclusion file for the "AVIR" image resizer. This - * inclusion file contains implementation of the AVIR image resizing algorithm - * in its entirety. Also includes several classes and functions that can be - * useful elsewhere. - * - * AVIR Copyright (c) 2015-2019 Aleksey Vaneev - * - * @mainpage - * - * @section intro_sec Introduction - * - * Description is available at https://github.com/avaneev/avir - * - * AVIR is devoted to women. Your digital photos can look good at any size! - * - * @section license License - * - * AVIR License Agreement - * - * The MIT License (MIT) - * - * Copyright (c) 2015-2019 Aleksey Vaneev - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Please credit the author of this library in your documentation in the - * following way: "AVIR image resizing algorithm designed by Aleksey Vaneev" - * - * @version 2.4 - */ - -#ifndef AVIR_CIMAGERESIZER_INCLUDED -#define AVIR_CIMAGERESIZER_INCLUDED - -#include -#include -#include -#include - -namespace avir { - -/** - * The macro defines AVIR version string. - */ - -#define AVIR_VERSION "2.4" - -/** - * The macro equals to "pi" constant, fills 53-bit floating point mantissa. - * Undefined at the end of file. - */ - -#define AVIR_PI 3.1415926535897932 - -/** - * The macro equals to "pi divided by 2" constant, fills 53-bit floating - * point mantissa. Undefined at the end of file. - */ - -#define AVIR_PId2 1.5707963267948966 - -/** - * Rounding function, based on the (int) typecast. Biased result. Not suitable - * for numbers >= 2^31. - * - * @param d Value to round. - * @return Rounded value. Some bias may be introduced. - */ - -template< class T > -inline T round( const T d ) -{ - return( d < 0.0 ? -(T) (int) ( (T) 0.5 - d ) : (T) (int) ( d + (T) 0.5 )); -} - -/** - * Template function "clamps" (clips) the specified value so that it is not - * lesser than "minv", and not greater than "maxv". - * - * @param Value Value to clamp. - * @param minv Minimal allowed value. - * @param maxv Maximal allowed value. - * @return The clamped value. - */ - -template< class T > -inline T clamp( const T& Value, const T minv, const T maxv ) -{ - if( Value < minv ) - { - return( minv ); - } - else - if( Value > maxv ) - { - return( maxv ); - } - else - { - return( Value ); - } -} - -/** - * Power 2.4 approximation function, designed for sRGB gamma correction. - * - * @param x Argument, in the range 0.09 to 1. - * @return Value raised into power 2.4, approximate. - */ - -template< class T > -inline T pow24_sRGB( const T x ) -{ - const double x2 = x * x; - const double x3 = x2 * x; - const double x4 = x2 * x2; - - return( (T) ( 0.0985766365536824 + 0.839474952656502 * x2 + - 0.363287814061725 * x3 - 0.0125559718896615 / - ( 0.12758338921578 + 0.290283465468235 * x ) - - 0.231757513261358 * x - 0.0395365717969074 * x4 )); -} - -/** - * Power 1/2.4 approximation function, designed for sRGB gamma correction. - * - * @param x Argument, in the range 0.003 to 1. - * @return Value raised into power 1/2.4, approximate. - */ - -template< class T > -inline T pow24i_sRGB( const T x ) -{ - const double sx = sqrt( x ); - const double ssx = sqrt( sx ); - const double sssx = sqrt( ssx ); - - return( (T) ( 0.000213364515060263 + 0.0149409239419218 * x + - 0.433973412731747 * sx + ssx * ( 0.659628181609715 * sssx - - 0.0380957908841466 - 0.0706476137208521 * sx ))); -} - -/** - * Function approximately linearizes the sRGB gamma value. - * - * @param s sRGB gamma value, in the range 0 to 1. - * @return Linearized sRGB gamma value, approximated. - */ - -template< class T > -inline T convertSRGB2Lin( const T s ) -{ - const T a = (T) 0.055; - - if( s <= (T) 0.04045 ) - { - return( s / (T) 12.92 ); - } - - return( pow24_sRGB(( s + a ) / ( (T) 1 + a ))); -} - -/** - * Function approximately de-linearizes the linear gamma value. - * - * @param s Linear gamma value, in the range 0 to 1. - * @return sRGB gamma value, approximated. - */ - -template< class T > -inline T convertLin2SRGB( const T s ) -{ - const T a = (T) 0.055; - - if( s <= (T) 0.0031308 ) - { - return( (T) 12.92 * s ); - } - - return(( (T) 1 + a ) * pow24i_sRGB( s ) - a ); -} - -/** - * Function converts (via typecast) specified array of type T1 values of - * length l into array of type T2 values. If T1 is the same as T2, copy - * operation is performed. When copying data at overlapping address spaces, - * "op" should be lower than "ip". - * - * @param ip Input buffer. - * @param[out] op Output buffer. - * @param l The number of elements to copy. - * @param ip Input buffer pointer increment. - * @param op Output buffer pointer increment. - */ - -template< class T1, class T2 > -inline void copyArray( const T1* ip, T2* op, int l, - const int ipinc = 1, const int opinc = 1 ) -{ - while( l > 0 ) - { - *op = (T2) *ip; - op += opinc; - ip += ipinc; - l--; - } -} - -/** - * Function adds values located in array "ip" to array "op". - * - * @param ip Input buffer. - * @param[out] op Output buffer. - * @param l The number of elements to add. - * @param ip Input buffer pointer increment. - * @param op Output buffer pointer increment. - */ - -template< class T1, class T2 > -inline void addArray( const T1* ip, T2* op, int l, - const int ipinc = 1, const int opinc = 1 ) -{ - while( l > 0 ) - { - *op += *ip; - op += opinc; - ip += ipinc; - l--; - } -} - -/** - * Function that replicates a set of adjacent elements several times in a row. - * This operation is usually used to replicate pixels at the start or end of - * image's scanline. - * - * @param ip Source array. - * @param ipl Source array length (usually 1..4, but can be any number). - * @param[out] op Destination buffer. - * @param l Number of times the source array should be replicated (the - * destination buffer should be able to hold ipl * l number of elements). - * @param opinc Destination buffer position increment after replicating the - * source array. This value should be equal to at least ipl. - */ - -template< class T1, class T2 > -inline void replicateArray( const T1* const ip, const int ipl, T2* op, int l, - const int opinc ) -{ - if( ipl == 1 ) - { - while( l > 0 ) - { - op[ 0 ] = ip[ 0 ]; - op += opinc; - l--; - } - } - else - if( ipl == 4 ) - { - while( l > 0 ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - op += opinc; - l--; - } - } - else - if( ipl == 3 ) - { - while( l > 0 ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op += opinc; - l--; - } - } - else - if( ipl == 2 ) - { - while( l > 0 ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op += opinc; - l--; - } - } - else - { - while( l > 0 ) - { - int i; - - for( i = 0; i < ipl; i++ ) - { - op[ i ] = ip[ i ]; - } - - op += opinc; - l--; - } - } -} - -/** - * Function calculates frequency response of the specified FIR filter at the - * specified circular frequency. Phase can be calculated as atan2( im, re ). - * Function uses computationally-efficient oscillators instead of "cos" and - * "sin" functions. - * - * @param flt FIR filter's coefficients. - * @param fltlen Number of coefficients (taps) in the filter. - * @param th Circular frequency [0; pi]. - * @param[out] re0 Resulting real part of the complex frequency response. - * @param[out] im0 Resulting imaginary part of the complex frequency response. - * @param fltlat Filter's latency in samples (taps). - */ - -template< class T > -inline void calcFIRFilterResponse( const T* flt, int fltlen, - const double th, double& re0, double& im0, const int fltlat = 0 ) -{ - const double sincr = 2.0 * cos( th ); - double cvalue1; - double svalue1; - - if( fltlat == 0 ) - { - cvalue1 = 1.0; - svalue1 = 0.0; - } - else - { - cvalue1 = cos( -fltlat * th ); - svalue1 = sin( -fltlat * th ); - } - - double cvalue2 = cos( -( fltlat + 1 ) * th ); - double svalue2 = sin( -( fltlat + 1 ) * th ); - - double re = 0.0; - double im = 0.0; - - while( fltlen > 0 ) - { - re += cvalue1 * flt[ 0 ]; - im += svalue1 * flt[ 0 ]; - flt++; - fltlen--; - - double tmp = cvalue1; - cvalue1 = sincr * cvalue1 - cvalue2; - cvalue2 = tmp; - - tmp = svalue1; - svalue1 = sincr * svalue1 - svalue2; - svalue2 = tmp; - } - - re0 = re; - im0 = im; -} - -/** - * Function normalizes FIR filter so that its frequency response at DC is - * equal to DCGain. - * - * @param[in,out] p Filter coefficients. - * @param l Filter length. - * @param DCGain Filter's gain at DC. - * @param pstep "p" array step. - */ - -template< class T > -inline void normalizeFIRFilter( T* const p, const int l, const double DCGain, - const int pstep = 1 ) -{ - double s = 0.0; - T* pp = p; - int i = l; - - while( i > 0 ) - { - s += *pp; - pp += pstep; - i--; - } - - s = DCGain / s; - pp = p; - i = l; - - while( i > 0 ) - { - *pp = (T) ( *pp * s ); - pp += pstep; - i--; - } -} - -/** - * @brief Memory buffer class for element array storage, with capacity - * tracking. - * - * Allows easier handling of memory blocks allocation and automatic - * deallocation for arrays (buffers) consisting of elements of specified - * class. Tracks buffer's capacity in "int" variable; unsuitable for - * allocation of very large memory blocks (with more than 2 billion elements). - * - * This class manages memory space only - it does not perform element class - * construction (initialization) operations. Buffer's required memory address - * alignment specification is supported. - * - * Uses standard library to allocate and deallocate memory. - * - * @tparam T Buffer element's type. - * @tparam capint Buffer capacity's type to use. Use size_t for large buffers. - */ - -template< class T, typename capint = int > -class CBuffer -{ -public: - CBuffer() - : Data( NULL ) - , DataAligned( NULL ) - , Capacity( 0 ) - , Alignment( 0 ) - { - } - - /** - * Constructor creates the buffer with the specified capacity. - * - * @param aCapacity Buffer's capacity. - * @param aAlignment Buffer's required memory address alignment. 0 - use - * stdlib's default alignment. - */ - - CBuffer( const capint aCapacity, const int aAlignment = 0 ) - { - allocinit( aCapacity, aAlignment ); - } - - CBuffer( const CBuffer& Source ) - { - allocinit( Source.Capacity, Source.Alignment ); - memcpy( DataAligned, Source.DataAligned, Capacity * sizeof( T )); - } - - ~CBuffer() - { - freeData(); - } - - CBuffer& operator = ( const CBuffer& Source ) - { - alloc( Source.Capacity, Source.Alignment ); - memcpy( DataAligned, Source.DataAligned, Capacity * sizeof( T )); - return( *this ); - } - - /** - * Function allocates memory so that the specified number of elements - * can be stored in *this buffer object. - * - * @param aCapacity Storage for this number of elements to allocate. - * @param aAlignment Buffer's required memory address alignment, - * power-of-2 values only. 0 - use stdlib's default alignment. - */ - - void alloc( const capint aCapacity, const int aAlignment = 0 ) - { - freeData(); - allocinit( aCapacity, aAlignment ); - } - - /** - * Function deallocates any previously allocated buffer. - */ - - void free() - { - freeData(); - Data = NULL; - DataAligned = NULL; - Capacity = 0; - Alignment = 0; - } - - /** - * @return The capacity of the element buffer. - */ - - capint getCapacity() const - { - return( Capacity ); - } - - /** - * Function "forces" *this buffer to have an arbitary capacity. Calling - * this function invalidates all further operations except deleting *this - * object. This function should not be usually used at all. Function can - * be used to "model" certain buffer capacity without calling a costly - * memory allocation function. - * - * @param NewCapacity A new "forced" capacity. - */ - - void forceCapacity( const capint NewCapacity ) - { - Capacity = NewCapacity; - } - - /** - * Function reallocates *this buffer to a larger size so that it will be - * able to hold the specified number of elements. Downsizing is not - * performed. Alignment is not changed. - * - * @param NewCapacity New (increased) capacity. - * @param DoDataCopy "True" if data in the buffer should be retained. - */ - - void increaseCapacity( const capint NewCapacity, - const bool DoDataCopy = true ) - { - if( NewCapacity < Capacity ) - { - return; - } - - if( DoDataCopy ) - { - const capint PrevCapacity = Capacity; - T* const PrevData = Data; - T* const PrevDataAligned = DataAligned; - - allocinit( NewCapacity, Alignment ); - memcpy( DataAligned, PrevDataAligned, PrevCapacity * sizeof( T )); - - :: free( PrevData ); - } - else - { - :: free( Data ); - allocinit( NewCapacity, Alignment ); - } - } - - /** - * Function "truncates" (reduces) capacity of the buffer without - * reallocating it. Alignment is not changed. - * - * @param NewCapacity New required capacity. - */ - - void truncateCapacity( const capint NewCapacity ) - { - if( NewCapacity >= Capacity ) - { - return; - } - - Capacity = NewCapacity; - } - - /** - * Function increases capacity so that the specified number of - * elements can be stored. This function increases the previous capacity - * value by third the current capacity value until space for the required - * number of elements is available. Alignment is not changed. - * - * @param ReqCapacity Required capacity. - */ - - void updateCapacity( const capint ReqCapacity ) - { - if( ReqCapacity <= Capacity ) - { - return; - } - - capint NewCapacity = Capacity; - - while( NewCapacity < ReqCapacity ) - { - NewCapacity += NewCapacity / 3 + 1; - } - - increaseCapacity( NewCapacity ); - } - - operator T* () const - { - return( DataAligned ); - } - -private: - T* Data; ///< Element buffer pointer. - ///< - T* DataAligned; ///< Memory address-aligned element buffer pointer. - ///< - capint Capacity; ///< Element buffer capacity. - ///< - int Alignment; ///< Memory address alignment in use. 0 - use stdlib's - ///< default alignment. - ///< - - /** - * Internal element buffer allocation function used during object - * construction. - * - * @param aCapacity Storage for this number of elements to allocate. - * @param aAlignment Buffer's required memory address alignment. 0 - use - * stdlib's default alignment. - */ - - void allocinit( const capint aCapacity, const int aAlignment ) - { - if( aAlignment == 0 ) - { - Data = (T*) :: malloc( aCapacity * sizeof( T )); - DataAligned = Data; - Alignment = 0; - } - else - { - Data = (T*) :: malloc( aCapacity * sizeof( T ) + aAlignment ); - DataAligned = alignptr( Data, aAlignment ); - Alignment = aAlignment; - } - - Capacity = aCapacity; - } - - /** - * Function frees a previously allocated Data buffer. - */ - - void freeData() - { - :: free( Data ); - } - - /** - * Function modifies the specified pointer so that it becomes memory - * address-aligned. - * - * @param ptr Pointer to align. - * @param align Alignment in bytes to apply. - * @return Pointer aligned to align bytes. Works with power-of-2 - * alignments only. If no alignment is necessary, "align" bytes will be - * added to the pointer value. - */ - - template< class Tp > - inline Tp alignptr( const Tp ptr, const uintptr_t align ) - { - return( (Tp) ( (uintptr_t) ptr + align - - ( (uintptr_t) ptr & ( align - 1 ))) ); - } -}; - -/** - * Function optimizes the length of the symmetric-odd FIR filter by removing - * left- and rightmost elements that are below specific threshold. - * - * Synthetic test shows that filter gets optimized in 2..3% of cases and in - * each such case optimization reduces filter length by 6..8%. Optimization, - * however, may skew the results of algorithm modeling and complexity - * calculation leading to a choice of a less optimal algorithm. - * - * @param[in,out] Flt Buffer that contains filter being optimized. - * @param[in,out] FltLatency Variable that holds the current latency of the - * filter. May be adjusted on function return. - * @param Threshold Threshold level. - */ - -template< class T > -inline void optimizeFIRFilter( CBuffer< T >& Flt, int& FltLatency, - T const Threshold = (T) 0.00001 ) -{ - int i; - - // Optimize length. - - for( i = 0; i <= FltLatency; i++ ) - { - if( fabs( Flt[ i ]) >= Threshold || i == FltLatency ) - { - if( i > 0 ) - { - const int NewCapacity = Flt.getCapacity() - i * 2; - copyArray( &Flt[ i ], &Flt[ 0 ], NewCapacity ); - Flt.truncateCapacity( NewCapacity ); - FltLatency -= i; - } - - break; - } - } -} - -/** - * @brief Array of structured objects. - * - * Implements allocation of a linear array of objects of class T (which are - * initialized), addressable via operator[]. Each object is created via the - * "operator new". New object insertions are quick since implementation uses - * prior space allocation (capacity), thus not requiring frequent memory block - * reallocations. - * - * @tparam T Array element's type. - */ - -template< class T > -class CStructArray -{ -public: - CStructArray() - : ItemCount( 0 ) - { - } - - CStructArray( const CStructArray& Source ) - : ItemCount( 0 ) - , Items( Source.getItemCount() ) - { - while( ItemCount < Source.getItemCount() ) - { - Items[ ItemCount ] = new T( Source[ ItemCount ]); - ItemCount++; - } - } - - ~CStructArray() - { - clear(); - } - - CStructArray& operator = ( const CStructArray& Source ) - { - clear(); - - const int NewCount = Source.ItemCount; - Items.updateCapacity( NewCount ); - - while( ItemCount < NewCount ) - { - Items[ ItemCount ] = new T( Source[ ItemCount ]); - ItemCount++; - } - - return( *this ); - } - - T& operator []( const int Index ) - { - return( *Items[ Index ]); - } - - const T& operator []( const int Index ) const - { - return( *Items[ Index ]); - } - - /** - * Function creates a new object of type T with the default constructor - * and adds this object to the array. - * - * @return Reference to a newly added object. - */ - - T& add() - { - if( ItemCount == Items.getCapacity() ) - { - Items.increaseCapacity( ItemCount * 3 / 2 + 1 ); - } - - Items[ ItemCount ] = new T(); - ItemCount++; - - return( (*this)[ ItemCount - 1 ]); - } - - /** - * Function changes number of allocated items. New items are created with - * the default constructor. If NewCount is below the current item count, - * items that are above NewCount range will be destructed. - * - * @param NewCount New requested item count. - */ - - void setItemCount( const int NewCount ) - { - if( NewCount > ItemCount ) - { - Items.increaseCapacity( NewCount ); - - while( ItemCount < NewCount ) - { - Items[ ItemCount ] = new T(); - ItemCount++; - } - } - else - { - while( ItemCount > NewCount ) - { - ItemCount--; - delete Items[ ItemCount ]; - } - } - } - - /** - * Function erases all items of *this array. - */ - - void clear() - { - while( ItemCount > 0 ) - { - ItemCount--; - delete Items[ ItemCount ]; - } - } - - /** - * @return The number of allocated items. - */ - - int getItemCount() const - { - return( ItemCount ); - } - -private: - int ItemCount; ///< The number of items available in the array. - ///< - CBuffer< T* > Items; ///< Element buffer. - ///< -}; - -/** - * @brief Sine signal generator class. - * - * Class implements sine signal generator without biasing, with - * constructor-based initalization only. This generator uses oscillator - * instead of "sin" function. - */ - -class CSineGen -{ -public: - /** - * Constructor initializes *this sine signal generator. - * - * @param si Sine function increment, in radians. - * @param ph Starting phase, in radians. Add 0.5 * AVIR_PI for cosine - * function. - */ - - CSineGen( const double si, const double ph ) - : svalue1( sin( ph )) - , svalue2( sin( ph - si )) - , sincr( 2.0 * cos( si )) - { - } - - /** - * @return The next value of the sine function, without biasing. - */ - - double generate() - { - const double res = svalue1; - - svalue1 = sincr * res - svalue2; - svalue2 = res; - - return( res ); - } - -private: - double svalue1; ///< Current sine value. - ///< - double svalue2; ///< Previous sine value. - ///< - double sincr; ///< Sine value increment. - ///< -}; - -/** - * @brief Peaked Cosine window function generator class. - * - * Class implements Peaked Cosine window function generator. Generates the - * right-handed half of the window function. The Alpha parameter of this - * window function offers the control of the balance between the early and - * later taps of the filter. E.g. at Alpha=1 both early and later taps are - * attenuated, but at Alpha=4 mostly later taps are attenuated. This offers a - * great control over ringing artifacts produced by a low-pass filter in image - * processing, without compromising achieved image sharpness. - */ - -class CDSPWindowGenPeakedCosine -{ -public: - /** - * Constructor initializes *this window function generator. - * - * @param aAlpha Alpha parameter, affects the peak shape (peak - * augmentation) of the window function. Should be >= 1.0. - * @param aLen2 Half filter's length (non-truncated). - */ - - CDSPWindowGenPeakedCosine( const double aAlpha, const double aLen2 ) - : Alpha( aAlpha ) - , Len2( aLen2 ) - , wn( 0 ) - , w1( AVIR_PId2 / Len2, AVIR_PI * 0.5 ) - { - } - - /** - * @return The next Peaked Cosine window function coefficient. - */ - - double generate() - { - const double h = pow( wn / Len2, Alpha ); - wn++; - - return( w1.generate() * ( 1.0 - h )); - } - -private: - double Alpha; ///< Alpha parameter, affects the peak shape of window. - ///< - double Len2; ///< Half length of the window function. - ///< - int wn; ///< Window function integer position. 0 - center of the - ///< window function. - ///< - CSineGen w1; ///< Sine-wave generator. - ///< -}; - -/** - * @brief FIR filter-based equalizer generator. - * - * Class implements an object used to generate symmetric-odd FIR filters with - * the specified frequency response (aka paragraphic equalizer). The - * calculated filter is windowed by the Peaked Cosine window function. - * - * In image processing, due to short length of filters being used (6-8 taps) - * the resulting frequency response of the filter is approximate and may be - * mathematically imperfect, but still adequate to the visual requirements. - * - * On a side note, this equalizer generator can be successfully used for audio - * signal equalization as well: for example, it is used in almost the same - * form in Voxengo Marvel GEQ equalizer plug-in. - * - * Filter generation is based on decomposition of frequency range into - * spectral bands, with each band represented by linear and ramp "kernels". - * When the filter is built, these kernels are combined together with - * different weights that approximate the required frequency response. - */ - -class CDSPFIREQ -{ -public: - /** - * Function initializes *this object with the required parameters. The - * gain of frequencies beyond the MinFreq..MaxFreq range are controlled by - * the first and the last band's gain. - * - * @param SampleRate Processing sample rate (use 2 for image processing). - * @param aFilterLength Required filter length in samples (taps). The - * actual filter length is truncated to an integer value. - * @param aBandCount Number of band crossover points required to control, - * including bands at MinFreq and MaxFreq. - * @param MinFreq Minimal frequency that should be controlled. - * @param MaxFreq Maximal frequency that should be controlled. - * @param IsLogBands "True" if the bands should be spaced logarithmically. - * @param WFAlpha Peaked Cosine window function's Alpha parameter. - */ - - void init( const double SampleRate, const double aFilterLength, - const int aBandCount, const double MinFreq, const double MaxFreq, - const bool IsLogBands, const double WFAlpha ) - { - FilterLength = aFilterLength; - BandCount = aBandCount; - - CenterFreqs.alloc( BandCount ); - - z = (int) ceil( FilterLength * 0.5 ); - zi = z + ( z & 1 ); - z2 = z * 2; - - CBuffer< double > oscbuf( z2 ); - initOscBuf( oscbuf ); - - CBuffer< double > winbuf( z ); - initWinBuf( winbuf, WFAlpha ); - - UseFirstVirtBand = ( MinFreq > 0.0 ); - const int k = zi * ( BandCount + ( UseFirstVirtBand ? 1 : 0 )); - Kernels1.alloc( k ); - Kernels2.alloc( k ); - - double m; // Frequency step multiplier. - double mo; // Frequency step offset (addition). - - if( IsLogBands ) - { - m = exp( log( MaxFreq / MinFreq ) / ( BandCount - 1 )); - mo = 0.0; - } - else - { - m = 1.0; - mo = ( MaxFreq - MinFreq ) / ( BandCount - 1 ); - } - - double f = MinFreq; - double x1 = 0.0; - double x2; - int si; - - if( UseFirstVirtBand ) - { - si = 0; - } - else - { - si = 1; - CenterFreqs[ 0 ] = 0.0; - f = f * m + mo; - } - - double* kernbuf1 = &Kernels1[ 0 ]; - double* kernbuf2 = &Kernels2[ 0 ]; - int i; - - for( i = si; i < BandCount; i++ ) - { - x2 = f * 2.0 / SampleRate; - CenterFreqs[ i ] = x2; - - fillBandKernel( x1, x2, kernbuf1, kernbuf2, oscbuf, winbuf ); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - f = f * m + mo; - } - - if( x1 < 1.0 ) - { - UseLastVirtBand = true; - fillBandKernel( x1, 1.0, kernbuf1, kernbuf2, oscbuf, winbuf ); - } - else - { - UseLastVirtBand = false; - } - } - - /** - * @return Filter's length, in samples (taps). - */ - - int getFilterLength() const - { - return( z2 - 1 ); - } - - /** - * @return Filter's latency (group delay), in samples (taps). - */ - - int getFilterLatency() const - { - return( z - 1 ); - } - - /** - * Function creates symmetric-odd FIR filter with the specified gain - * levels at band crossover points. - * - * @param BandGains Array of linear gain levels, count=BandCount specified - * in the init() function. - * @param[out] Filter Output filter buffer, length = getFilterLength(). - */ - - void buildFilter( const double* const BandGains, double* const Filter ) - { - const double* kernbuf1 = &Kernels1[ 0 ]; - const double* kernbuf2 = &Kernels2[ 0 ]; - double x1 = 0.0; - double y1 = BandGains[ 0 ]; - double x2; - double y2; - - int i; - int si; - - if( UseFirstVirtBand ) - { - si = 1; - x2 = CenterFreqs[ 0 ]; - y2 = y1; - } - else - { - si = 2; - x2 = CenterFreqs[ 1 ]; - y2 = BandGains[ 1 ]; - } - - copyBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2, - x1 * y2 - x2 * y1 ); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - y1 = y2; - - for( i = si; i < BandCount; i++ ) - { - x2 = CenterFreqs[ i ]; - y2 = BandGains[ i ]; - - addBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2, - x1 * y2 - x2 * y1 ); - - kernbuf1 += zi; - kernbuf2 += zi; - x1 = x2; - y1 = y2; - } - - if( UseLastVirtBand ) - { - addBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2, - x1 * y2 - y1 ); - } - - for( i = 0; i < z - 1; i++ ) - { - Filter[ z + i ] = Filter[ z - 2 - i ]; - } - } - - /** - * Function calculates filter's length (in samples) and latency depending - * on the required non-truncated filter length. - * - * @param aFilterLength Required filter length in samples (non-truncated). - * @param[out] Latency Resulting latency (group delay) of the filter, - * in samples (taps). - * @return Filter length in samples (taps). - */ - - static int calcFilterLength( const double aFilterLength, int& Latency ) - { - const int l = (int) ceil( aFilterLength * 0.5 ); - Latency = l - 1; - - return( l * 2 - 1 ); - } - -private: - double FilterLength; ///< Length of filter. - ///< - int z; ///< Equals (int) ceil( FilterLength * 0.5 ). - ///< - int zi; ///< Equals "z" if z is even, or z + 1 if z is odd. Used as a - ///< Kernels1 and Kernels2 size multiplier and kernel buffer increment - ///< to make sure each kernel buffer is 16-byte aligned. - ///< - int z2; ///< Equals z * 2. - ///< - int BandCount; ///< Number of controllable bands. - ///< - CBuffer< double > CenterFreqs; ///< Center frequencies for all bands, - ///< normalized to 0.0-1.0 range. - ///< - CBuffer< double > Kernels1; ///< Half-length kernel buffers for each - ///< spectral band (linear part). - ///< - CBuffer< double > Kernels2; ///< Half-length kernel buffers for each - ///< spectral band (ramp part). - ///< - bool UseFirstVirtBand; ///< "True" if the first virtual band - ///< (between 0.0 and MinFreq) should be used. The first virtual band - ///< won't be used if MinFreq equals 0.0. - ///< - bool UseLastVirtBand; ///< "True" if the last virtual band (between - ///< MaxFreq and SampleRate * 0.5) should be used. The last virtual - ///< band won't be used if MaxFreq * 2.0 equals SampleRate. - ///< - - /** - * Function initializes the "oscbuf" used in the fillBandKernel() - * function. - * - * @param oscbuf Oscillator buffer, length = z * 2. - */ - - void initOscBuf( double* oscbuf ) const - { - int i = z; - - while( i > 0 ) - { - oscbuf[ 0 ] = 0.0; - oscbuf[ 1 ] = 1.0; - oscbuf += 2; - i--; - } - } - - /** - * Function initializes window function buffer. This function generates - * Peaked Cosine window function. - * - * @param winbuf Windowing buffer. - * @param Alpha Peaked Cosine alpha parameter. - */ - - void initWinBuf( double* winbuf, const double Alpha ) const - { - CDSPWindowGenPeakedCosine wf( Alpha, FilterLength * 0.5 ); - int i; - - for( i = 1; i <= z; i++ ) - { - winbuf[ z - i ] = wf.generate(); - } - } - - /** - * Function fills first half of symmetric-odd FIR kernel for the band. - * This function should be called successively for adjacent bands. - * Previous band's x2 should be equal to current band's x1. A band kernel - * consists of 2 elements: linear kernel and ramp kernel. - * - * @param x1 Band's left corner frequency (0..1). - * @param x2 Band's right corner frequency (0..1). - * @param kernbuf1 Band kernel buffer 1 (linear part), length = z. - * @param kernbuf2 Band kernel buffer 2 (ramp part), length = z. - * @param oscbuf Oscillation buffer. Before the first call of the - * fillBandKernel() should be initialized with the call of the - * initOscBuf() function. - * @param winbuf Buffer that contains windowing function. - */ - - void fillBandKernel( const double x1, const double x2, double* kernbuf1, - double* kernbuf2, double* oscbuf, const double* const winbuf ) - { - const double s2_incr = AVIR_PI * x2; - const double s2_coeff = 2.0 * cos( s2_incr ); - - double s2_value1 = sin( s2_incr * ( -z + 1 )); - double c2_value1 = sin( s2_incr * ( -z + 1 ) + AVIR_PI * 0.5 ); - oscbuf[ 0 ] = sin( s2_incr * -z ); - oscbuf[ 1 ] = sin( s2_incr * -z + AVIR_PI * 0.5 ); - - int ks; - - for( ks = 1; ks < z; ks++ ) - { - const int ks2 = ks * 2; - const double s1_value1 = oscbuf[ ks2 ]; - const double c1_value1 = oscbuf[ ks2 + 1 ]; - oscbuf[ ks2 ] = s2_value1; - oscbuf[ ks2 + 1 ] = c2_value1; - - const double x = AVIR_PI * ( ks - z ); - const double v0 = winbuf[ ks - 1 ] / (( x1 - x2 ) * x ); - - kernbuf1[ ks - 1 ] = ( x2 * s2_value1 - x1 * s1_value1 + - ( c2_value1 - c1_value1 ) / x ) * v0; - - kernbuf2[ ks - 1 ] = ( s2_value1 - s1_value1 ) * v0; - - s2_value1 = s2_coeff * s2_value1 - oscbuf[ ks2 - 2 ]; - c2_value1 = s2_coeff * c2_value1 - oscbuf[ ks2 - 1 ]; - } - - kernbuf1[ z - 1 ] = ( x2 * x2 - x1 * x1 ) / ( x1 - x2 ) * 0.5; - kernbuf2[ z - 1 ] = -1.0; - } - - /** - * Function copies band kernel's elements to the output buffer. - * - * @param outbuf Output buffer. - * @param kernbuf1 Kernel buffer 1 (linear part). - * @param kernbuf2 Kernel buffer 2 (ramp part). - * @param c Multiplier for linear kernel element. - * @param d Multiplier for ramp kernel element. - */ - - void copyBandKernel( double* outbuf, const double* const kernbuf1, - const double* const kernbuf2, const double c, const double d ) const - { - int ks; - - for( ks = 0; ks < z; ks++ ) - { - outbuf[ ks ] = c * kernbuf1[ ks ] + d * kernbuf2[ ks ]; - } - } - - /** - * Function adds band kernel's elements to the output buffer. - * - * @param outbuf Output buffer. - * @param kernbuf1 Kernel buffer 1 (linear part). - * @param kernbuf2 Kernel buffer 2 (ramp part). - * @param c Multiplier for linear kernel element. - * @param d Multiplier for ramp kernel element. - */ - - void addBandKernel( double* outbuf, const double* const kernbuf1, - const double* const kernbuf2, const double c, const double d ) const - { - int ks; - - for( ks = 0; ks < z; ks++ ) - { - outbuf[ ks ] += c * kernbuf1[ ks ] + d * kernbuf2[ ks ]; - } - } -}; - -/** - * @brief Low-pass filter windowed by Peaked Cosine window function. - * - * This class implements calculation of linear-phase symmetric-odd FIR - * low-pass filter windowed by the Peaked Cosine window function, for image - * processing applications. - */ - -class CDSPPeakedCosineLPF -{ -public: - int fl2; ///< Half filter's length, excluding the peak value. This value - ///< can be also used as filter's latency in samples (taps). - ///< - int FilterLen; ///< Filter's length in samples (taps). - ///< - - /** - * Constructor initalizes *this object. - * - * @param aLen2 Half-length (non-truncated) of low-pass filter, in samples - * (taps). - * @param aFreq2 Low-pass filter's corner frequency [0; pi]. - * @param aAlpha Peaked Cosine window function Alpha parameter. - */ - - CDSPPeakedCosineLPF( const double aLen2, const double aFreq2, - const double aAlpha ) - : fl2( (int) ceil( aLen2 ) - 1 ) - , FilterLen( fl2 + fl2 + 1 ) - , Len2( aLen2 ) - , Freq2( aFreq2 ) - , Alpha( aAlpha ) - { - } - - /** - * Function generates a linear-phase low-pass filter windowed by Peaked - * Cosine window function. - * - * @param[out] op Output buffer, length = FilterLen (fl2 * 2 + 1). - * @param DCGain Required gain at DC. The resulting filter will be - * normalized to achieve this DC gain. - */ - - template< class T > - void generateLPF( T* op, const double DCGain ) - { - CDSPWindowGenPeakedCosine wf( Alpha, Len2 ); - CSineGen f2( Freq2, 0.0 ); - - op += fl2; - T* op2 = op; - f2.generate(); - int t = 1; - - *op = (T) ( Freq2 * wf.generate() / AVIR_PI ); - double s = *op; - - while( t <= fl2 ) - { - const double v = f2.generate() * wf.generate() / t / AVIR_PI; - op++; - op2--; - *op = (T) v; - *op2 = (T) v; - s += *op + *op2; - t++; - } - - t = FilterLen; - s = DCGain / s; - - while( t > 0 ) - { - *op2 = (T) ( *op2 * s ); - op2++; - t--; - } - } - -private: - double Len2; ///< Half-length (non-truncated) of low-pass filter, in - ///< samples (taps). - ///< - double Freq2; ///< Low-pass filter's corner frequency. - ///< - double Alpha; ///< Peaked Cosine window function Alpha parameter. - ///< -}; - -/** - * @brief Buffer class for parametrized low-pass filter. - * - * This class extends the CBuffer< double > class by adding several variables - * that define a symmetric-odd FIR low-pass filter windowed by Peaked Cosine - * window function. This class can be used to compare filters without - * comparing their buffer contents. - */ - -class CFltBuffer : public CBuffer< double > -{ -public: - double Len2; ///< Half-length (non-truncated) of low-pass filters, in - ///< samples (taps). - ///< - double Freq; ///< Low-pass filter's corner frequency. - ///< - double Alpha; ///< Peaked Cosine window function Alpha parameter. - ///< - double DCGain; ///< DC gain applied to the filter. - ///< - - CFltBuffer() - : CBuffer< double >() - , Len2( 0.0 ) - , Freq( 0.0 ) - , Alpha( 0.0 ) - , DCGain( 0.0 ) - { - } - - /** - * @param b2 Filter buffer to compare *this object to. - * @return Operator returns "true" if both filters have same parameters. - */ - - bool operator == ( const CFltBuffer& b2 ) const - { - return( Len2 == b2.Len2 && Freq == b2.Freq && Alpha == b2.Alpha && - DCGain == b2.DCGain ); - } -}; - -/** - * @brief Sinc function-based fractional delay filter bank. - * - * Class implements storage and initialization of a bank of sinc - * function-based fractional delay filters, expressed as 1st order polynomial - * interpolation coefficients. The filters are produced from a single "long" - * windowed low-pass filter. Also supports 0th-order ("nearest neighbor") - * interpolation. - * - * This class also supports multiplication of each fractional delay filter by - * an external filter (usually a low-pass filter). - * - * @tparam fptype Specifies storage type of the filter coefficients bank. The - * filters are initially calculated using the "double" precision. - */ - -template< class fptype > -class CDSPFracFilterBankLin -{ -public: - CDSPFracFilterBankLin() - : Order( -1 ) - { - } - - /** - * Copy constructor copies a limited set of parameters of the source - * filter bank. The actual filters are not copied. Such copying is used - * during filtering steps "modeling" stage. A further init() function - * call is required. - * - * @param s Source filter bank. - */ - - void copyInitParams( const CDSPFracFilterBankLin& s ) - { - WFLen2 = s.WFLen2; - WFFreq = s.WFFreq; - WFAlpha = s.WFAlpha; - FracCount = s.FracCount; - Order = s.Order; - Alignment = s.Alignment; - SrcFilterLen = s.SrcFilterLen; - FilterLen = s.FilterLen; - FilterSize = s.FilterSize; - IsSrcTableBuilt = false; - ExtFilter = s.ExtFilter; - TableFillFlags.alloc( s.TableFillFlags.getCapacity() ); - int i; - - // Copy table fill flags, but shifted so that further initialization - // is still possible (such feature should not be used, though). - - for( i = 0; i < TableFillFlags.getCapacity(); i++ ) - { - TableFillFlags[ i ] = (uint8_t) ( s.TableFillFlags[ i ] << 2 ); - } - } - - /** - * Operator compares *this filter bank and another filter bank and returns - * "true" if their parameters are equal. Alignment is not taken into - * account. - * - * @param s Filter bank to compare to. - * @return "True" if compared banks have equal parameters. - */ - - bool operator == ( const CDSPFracFilterBankLin& s ) const - { - return( Order == s.Order && WFLen2 == s.WFLen2 && - WFFreq == s.WFFreq && WFAlpha == s.WFAlpha && - FracCount == s.FracCount && ExtFilter == s.ExtFilter ); - } - - /** - * Function initializes (builds) the filter bank based on the supplied - * parameters. If the supplied parameters are equal to previously defined - * parameters, function does nothing (alignment is assumed to be never - * changing between the init() function calls). - * - * @param ReqFracCount Required number of fractional delays in the filter - * bank. The minimal value is 2. - * @param ReqOrder Required order of the interpolation polynomial - * (0 or 1). - * @param BaseLen Low-pass filter's base length, in samples (taps). - * Affects the actual length of the filter and its overall steepness. - * @param Cutoff Low-pass filter's normalized cutoff frequency [0; 1]. - * @param aWFAlpha Peaked Cosine window function's Alpha parameter. - * @param aExtFilter External filter to apply to each fractional delay - * filter. - * @param aAlignment Memory alignment of the filter bank, power-of-2 - * value. 0 - use default stdlib alignment. - * @param FltLenAlign Filter's length alignment, power-of-2 value. - */ - - void init( const int ReqFracCount, const int ReqOrder, - const double BaseLen, const double Cutoff, const double aWFAlpha, - const CFltBuffer& aExtFilter, const int aAlignment = 0, - const int FltLenAlign = 1 ) - { - double NewWFLen2 = 0.5 * BaseLen * ReqFracCount; - double NewWFFreq = AVIR_PI * Cutoff / ReqFracCount; - double NewWFAlpha = aWFAlpha; - - if( ReqOrder == Order && NewWFLen2 == WFLen2 && NewWFFreq == WFFreq && - NewWFAlpha == WFAlpha && ReqFracCount == FracCount && - aExtFilter == ExtFilter ) - { - IsInitRequired = false; - return; - } - - WFLen2 = NewWFLen2; - WFFreq = NewWFFreq; - WFAlpha = NewWFAlpha; - FracCount = ReqFracCount; - Order = ReqOrder; - Alignment = aAlignment; - ExtFilter = aExtFilter; - - CDSPPeakedCosineLPF p( WFLen2, WFFreq, WFAlpha ); - SrcFilterLen = ( p.fl2 / ReqFracCount + 1 ) * 2; - - const int ElementSize = ReqOrder + 1; - FilterLen = SrcFilterLen; - - if( ExtFilter.getCapacity() > 0 ) - { - FilterLen += ExtFilter.getCapacity() - 1; - } - - FilterLen = ( FilterLen + FltLenAlign - 1 ) & ~( FltLenAlign - 1 ); - FilterSize = FilterLen * ElementSize; - IsSrcTableBuilt = false; - IsInitRequired = true; - } - - /** - * @return The length of each fractional delay filter, in samples (taps). - * Always an even value. - */ - - int getFilterLen() const - { - return( FilterLen ); - } - - /** - * @return The number of fractional filters in use by *this bank. - */ - - int getFracCount() const - { - return( FracCount ); - } - - /** - * @return The order of the interpolation polynomial. - */ - - int getOrder() const - { - return( Order ); - } - - /** - * Function returns the pointer to the specified interpolation table - * filter. - * - * @param i Filter (fractional delay) index, in the range 0 to - * ReqFracCount - 1, inclusive. - * @return Pointer to filter. Higher order polynomial coefficients are - * stored after after previous order coefficients, separated by FilterLen - * elements. - */ - - const fptype* getFilter( const int i ) - { - if( !IsSrcTableBuilt ) - { - buildSrcTable(); - } - - fptype* const Res = &Table[ i * FilterSize ]; - - if(( TableFillFlags[ i ] & 2 ) == 0 ) - { - createFilter( i ); - TableFillFlags[ i ] |= 2; - - if( Order > 0 ) - { - createFilter( i + 1 ); - const fptype* const Res2 = Res + FilterSize; - fptype* const op = Res + FilterLen; - int j; - - // Create higher-order interpolation coefficients (linear - // interpolation). - - for( j = 0; j < FilterLen; j++ ) - { - op[ j ] = Res2[ j ] - Res[ j ]; - } - } - } - - return( Res ); - } - - /** - * Function makes sure all fractional delay filters were created. - */ - - void createAllFilters() - { - int i; - - for( i = 0; i < FracCount; i++ ) - { - getFilter( i ); - } - } - - /** - * Function returns an approximate initialization complexity, expressed in - * the number of multiply-add operations. This includes fractional delay - * filters calculation and multiplication by an external filter. This - * function can only be called after the init() function. - * - * @param FracUseMap Fractional delays use map, each element corresponds - * to a single fractional delay, will be compared to the internal table - * fill flags. This map should include 0 and 1 values only. - * @return The complexity of the initialization, expressed in the number - * of multiply-add operations. - */ - - int calcInitComplexity( const CBuffer< uint8_t >& FracUseMap ) const - { - const int FltInitCost = 65; // Cost to initialize a single sample - // of the fractional delay filter. - const int FltUseCost = FilterLen * Order + - SrcFilterLen * ExtFilter.getCapacity(); // Cost to use a single - // fractional delay filter. - const int ucb[ 2 ] = { 0, FltUseCost }; - int ic; - int i; - - if( IsInitRequired ) - { - ic = FracCount * SrcFilterLen * FltInitCost; - - for( i = 0; i < FracCount; i++ ) - { - ic += ucb[ FracUseMap[ i ]]; - } - } - else - { - ic = 0; - - for( i = 0; i < FracCount; i++ ) - { - if( FracUseMap[ i ] != 0 ) - { - ic += ucb[ TableFillFlags[ i ] == 0 ? 1 : 0 ]; - } - } - } - - return( ic ); - } - -private: - static const int InterpPoints = 2; ///< The maximal number of points the - ///< interpolation is based on. - ///< - double WFLen2; ///< Window function's Len2 parameter. - ///< - double WFFreq; ///< Window function's Freq parameter. - ///< - double WFAlpha; ///< Window function's Alpha parameter. - ///< - int FracCount; ///< The required number of fractional delay filters. - ///< - int Order; ///< The order of the interpolation polynomial. - ///< - int Alignment; ///< The required filter table alignment. - ///< - int SrcFilterLen; ///< Length of the "source" filters. This is always an - ///< even value. - ///< - int FilterLen; ///< Specifies the number of samples (taps) each fractional - ///< delay filter has. This is always an even value, adjusted by the - ///< FltLenAlign. - ///< - int FilterSize; ///< The size of a single filter element, equals - ///< FilterLen * ElementSize. - ///< - bool IsInitRequired; ///< "True" if SrcTable filter table initialization - ///< is required. This value is available only after the call to the - ///< init() function. - ///< - CBuffer< fptype > Table; ///< Interpolation table, size equals to - ///< ReqFracCount * FilterLen * ElementSize. - ///< - CBuffer< uint8_t > TableFillFlags; ///< Contains ReqFracCount + 1 - ///< elements. Bit 0 of every element is 1 if Table already contains - ///< the filter from SrcTable filtered by ExtFilter. Bit 1 of every - ///< element means higher order coefficients were filled for the - ///< filter. - ///< - CFltBuffer ExtFilter; ///< External filter that should be applied to every - ///< fractional delay filter. Can be empty. Half of this filter's - ///< capacity is used as latency (group delay) value of the filter. - ///< - CBuffer< double > SrcTable; ///< Source table of delay filters, contains - ///< ReqFracCount + 1 elements. This table is used to fill the Table - ///< with the actual filters, filtered by an external filter. - ///< - bool IsSrcTableBuilt; ///< "True" if the SrcTable was built already. This - ///< variable is set to "false" in the init() function. - ///< - - /** - * Function builds source table used in the createFilter() function. - */ - - void buildSrcTable() - { - IsSrcTableBuilt = true; - IsInitRequired = false; - - CDSPPeakedCosineLPF p( WFLen2, WFFreq, WFAlpha ); - - const int BufLen = SrcFilterLen * FracCount + InterpPoints - 1; - const int BufOffs = InterpPoints / 2 - 1; - const int BufCenter = SrcFilterLen * FracCount / 2 + BufOffs; - - CBuffer< double > Buf( BufLen ); - memset( Buf, 0, ( BufCenter - p.fl2 ) * sizeof( double )); - int i = BufLen - BufCenter - p.fl2 - 1; - memset( &Buf[ BufLen - i ], 0, i * sizeof( double )); - - p.generateLPF( &Buf[ BufCenter - p.fl2 ], FracCount ); asm("int3"); - - SrcTable.alloc(( FracCount + 1 ) * SrcFilterLen ); - TableFillFlags.alloc( FracCount + 1 ); - int j; - double* op0 = SrcTable; - - for( i = FracCount; i >= 0; i-- ) - { - TableFillFlags[ i ] = 0; - double* p = Buf + BufOffs + i; - - for( j = 0; j < SrcFilterLen; j++ ) - { - op0[ 0 ] = p[ 0 ]; - op0++; - p += FracCount; - } - } - - Table.alloc(( FracCount + 1 ) * FilterSize, Alignment ); - } - - /** - * Function creates the specified filter in the Table by copying it from - * the SrcTable and filtering by ExtFilter. Function does nothing if - * filter was already created. - * - * @param k Filter index to create, in the range 0 to FracCount, - * inclusive. - */ - - void createFilter( const int k ) - { - if( TableFillFlags[ k ] != 0 ) - { - return; - } - - TableFillFlags[ k ] |= 1; - const int ExtFilterLatency = ExtFilter.getCapacity() / 2; - const int ResLatency = ExtFilterLatency + SrcFilterLen / 2; - int ResLen = SrcFilterLen; - - if( ExtFilter.getCapacity() > 0 ) - { - ResLen += ExtFilter.getCapacity() - 1; - } - - const int ResOffs = FilterLen / 2 - ResLatency; - fptype* op = &Table[ k * FilterSize ]; - int i; - - for( i = 0; i < ResOffs; i++ ) - { - op[ i ] = 0.0; - } - - for( i = ResOffs + ResLen; i < FilterLen; i++ ) - { - op[ i ] = 0.0; - } - - op += ResOffs; - const double* const srcflt = &SrcTable[ k * SrcFilterLen ]; - - if( ExtFilter.getCapacity() == 0 ) - { - for( i = 0; i < ResLen; i++ ) - { - op[ i ] = (fptype) srcflt[ i ]; - } - - return; - } - - // Perform convolution of extflt and srcflt. - - const double* const extflt = &ExtFilter[ 0 ]; - int j; - - for( j = 0; j < ResLen; j++ ) - { - int k = 0; - int l = j - ExtFilter.getCapacity() + 1; - int r = l + ExtFilter.getCapacity(); - - if( l < 0 ) - { - k -= l; - l = 0; - } - - if( r > SrcFilterLen ) - { - r = SrcFilterLen; - } - - const double* const extfltb = extflt + k; - const double* const srcfltb = srcflt + l; - double s = 0.0; - l = r - l; - - for( i = 0; i < l; i++ ) - { - s += extfltb[ i ] * srcfltb[ i ]; - } - - op[ j ] = (fptype) s; - } - } -}; - -/** - * @brief Thread pool for multi-threaded image resizing operation. - * - * This base class is used to organize a multi-threaded image resizing - * operation. The thread pool should consist of threads that initially wait - * for a signal. Upon receiving a signal (via the startAllWorkloads() - * function) each previously added thread should execute its workload's - * process() function once, and return to the wait signal state again. The - * thread pool should be also able to efficiently wait for all workloads to - * finish via the waitAllWorkloadsToFinish() function. - * - * The image resizing algorithm makes calls to functions of this class. - */ - -class CImageResizerThreadPool -{ -public: - CImageResizerThreadPool() - { - } - - virtual ~CImageResizerThreadPool() - { - } - - /** - * @brief Thread pool's workload object class. - * - * This class should be used as a base class for objects that perform the - * actual work spread over several threads. - */ - - class CWorkload - { - public: - virtual ~CWorkload() - { - } - - /** - * Function that gets called from the thread when thread pool's - * startAllWorkloads() function is called. - */ - - virtual void process() = 0; - }; - - /** - * @return The suggested number of workloads (and their associated - * threads) to add. The minimal value this function can return is 1. The - * usual value may depend on the number of physical and virtual cores - * present in the system, and on other considerations. - */ - - virtual int getSuggestedWorkloadCount() const - { - return( 1 ); - } - - /** - * Function adds a new workload (and possibly thread) to the thread pool. - * The caller decides how many parallel workloads (and threads) it - * requires, but this number will not exceed the value returned by the - * getSuggestedWorkloadCount() function. It is implementation-specific how - * many workloads to associate with a single thread. But for efficiency - * reasons each workload should be associated with its own thread. - * - * Note that the same set of workload objects will be processed each time - * the startAllWorkloads() function is called. This means that workload - * objects are added only once. The caller changes the state of the - * workload objects and then calls the startAllWorkloads() function to - * process them. - * - * @param Workload Workload object whose process() function will be called - * from within the thread when the startAllWorkloads() function is called. - */ - - virtual void addWorkload( CWorkload* const Workload ) - { - } - - /** - * Function starts all workloads associated with threads previously added - * via the addWorkload() function. It is assumed that this function - * performs the necessary "memory barrier" (or "cache sync") kind of - * operation so that all threads catch up the prior changes made to the - * workload objects during their wait state. - */ - - virtual void startAllWorkloads() - { - } - - /** - * Function waits for all workloads to finish. - */ - - virtual void waitAllWorkloadsToFinish() - { - } - - /** - * Function removes all workloads previously added via the addWorkload() - * function. This function gets called only after the - * waitAllWorkloadsToFinish() function call. - */ - - virtual void removeAllWorkloads() - { - } -}; - -/** - * @brief Resizing algorithm parameters structure. - * - * This structure holds all selectable parameters used by the resizing - * algorithm at various stages, for both downsizing and upsizing. There are no - * other parameters exist that can optimize the performance of the resizing - * algorithm. Filter length parameters can take fractional values. - * - * Beside quality, these parameters (except Alpha parameters) directly affect - * the computative cost of the resizing algorithm. It is possible to trade - * the visual quality for computative cost. - * - * Anti-alias filtering during downsizing can be defined as a considerable - * reduction of contrast of smallest features of an image. Unfortunately, such - * de-contrasting partially affects features of all sizes thus producing a - * non-linearity of frequency response. All pre-defined parameter sets are - * described by 3 values separated by slashes. The first value is the - * de-contrasting factor of small features (which are being removed) while - * the second value is the de-contrasting factor of large features (which - * should remain intact), with value of 1 equating to "no contrast change". - * The third value is the optimization score (see below), with value of 0 - * equating to the "perfect" linearity of frequency response. - * - * The pre-defined parameter sets offered by this library were auto-optimized - * for the given LPFltBaseLen, IntFltLen and CorrFltAlpha values. The - * optimization goal was to minimize the score: the sum of squares of the - * difference between original and processed images (which was not actually - * resized, k=1). The original image was a 0.5 megapixel uniformly-distributed - * white-noise image with pixel intensities in the 0-1 range. Such goal - * converges very well and produces filtering system with the flattest - * frequency response possible for the given constraints. With this goal, - * increasing the LPFltBaseLen value reduces the general amount of aliasing - * artifacts. - */ - -struct CImageResizerParams -{ - double CorrFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the correction filter. The "usable" values are - ///< in the narrow range 1.0 to 1.5. - ///< - double CorrFltLen; ///< Correction filter's length in samples (taps). The - ///< "usable" range is narrow, 5.5 to 8, as to minimize the - ///< "overcorrection" which is mathematically precise, but visually - ///< unacceptable. - ///< - double IntFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the interpolation low-pass filter. The "usable" - ///< values are in the range 1.5 to 2.5. - ///< - double IntFltCutoff; ///< Interpolation low-pass filter's cutoff frequency - ///< (normalized, [0; 1]). The "usable" range is 0.6 to 0.8. - ///< - double IntFltLen; ///< Interpolation low-pass filter's length in samples - ///< (taps). The length value should be at least 18 or otherwise a - ///< "dark grid" artifact will be introduced if a further sharpening - ///< is applied. IntFltLen together with other IntFlt parameters - ///< should be tuned in a way that produces the flattest frequency - ///< response in 0-0.5 normalized frequency range (this range is due - ///< to 2X upsampling). - ///< - double LPFltAlpha; ///< Alpha parameter of the Peaked Cosine window - ///< function used on the low-pass filter. The "usable" values are - ///< in the range 1.5 to 6.5. - ///< - double LPFltBaseLen; ///< Base length of the low-pass (aka anti-aliasing - ///< or reconstruction) filter, in samples (taps), further adjusted by - ///< the actual cutoff frequency, upsampling and downsampling factors. - ///< The "usable" range is between 6 and 9. - ///< - double LPFltCutoffMult; ///< Low-pass filter's cutoff frequency - ///< multiplier. This value can be both below and above 1.0 as - ///< low-pass filters are inserted on downsampling and upsampling - ///< steps and always have corner frequency equal to or below 0.5pi. - ///< This multiplier shifts low-pass filter's corner frequency towards - ///< lower (if below 1.0) or higher (if above 1.0) frequencies. This - ///< multiplier can be way below 1.0 since any additional - ///< high-frequency damping will be partially corrected by the - ///< correction filter. The "usable" range is 0.3 to 1.0. - ///< - - CImageResizerParams() - : HBFltAlpha( 1.75395 ) - , HBFltCutoff( 0.40356 ) - , HBFltLen( 22.00000 ) - { - } - - double HBFltAlpha; ///< Half-band filter's Alpha. Assigned internally. - ///< - double HBFltCutoff; ///< Half-band filter's cutoff point [0; 1]. Assigned - ///< internally. - ///< - double HBFltLen; ///< Length of the half-band low-pass filter. Assigned - ///< internally. Internally used to perform 2X or higher downsampling. - ///< These filter parameters should be treated as "technical" and do - ///< not require adjustment as they were tuned to suit all - ///< combinations of other parameters. This half-band filter provides - ///< a wide transition band (for minimal ringing artifacts) and a high - ///< stop-band attenuation (for minimal aliasing). - ///< -}; - -/** - * @brief The default set of resizing algorithm parameters - * (10.01/1.029/0.019169). - * - * This is the default set of resizing parameters that was designed to deliver - * a sharp image while still providing a low amount of ringing artifacts, and - * having a reasonable computational cost. - */ - -struct CImageResizerParamsDef : public CImageResizerParams -{ - CImageResizerParamsDef() - { - CorrFltAlpha = 1.0;//10.01/1.88/1.029(522.43)/0.019169:258648,446808 - CorrFltLen = 6.30770; - IntFltAlpha = 2.27825; - IntFltCutoff = 0.75493; - IntFltLen = 18.0; - LPFltAlpha = 3.40127; - LPFltBaseLen = 7.78; - LPFltCutoffMult = 0.78797; - } -}; - -/** - * @brief Set of resizing algorithm parameters for ultra-low-ringing - * performance (7.69/1.069/0.000245). - * - * This set of resizing algorithm parameters offers the lowest amount of - * ringing this library is capable of providing while still offering a decent - * quality. Low ringing is attained at the expense of higher aliasing - * artifacts and a slightly reduced contrast. - */ - -struct CImageResizerParamsULR : public CImageResizerParams -{ - CImageResizerParamsULR() - { - CorrFltAlpha = 1.0;//7.69/1.97/1.069(31445.45)/0.000245:258627,436845 - CorrFltLen = 5.83280; - IntFltAlpha = 2.11453; - IntFltCutoff = 0.73986; - IntFltLen = 18.0; - LPFltAlpha = 1.73455; - LPFltBaseLen = 6.40; - LPFltCutoffMult = 0.61314; - } -}; - -/** - * @brief Set of resizing algorithm parameters for low-ringing performance - * (7.86/1.065/0.000106). - * - * This set of resizing algorithm parameters offers a very low-ringing - * performance at the expense of higher aliasing artifacts and a slightly - * reduced contrast. - */ - -struct CImageResizerParamsLR : public CImageResizerParams -{ - CImageResizerParamsLR() - { - CorrFltAlpha = 1.0;//7.86/1.96/1.065(73865.02)/0.000106:258636,437381 - CorrFltLen = 5.87671; - IntFltAlpha = 2.25322; - IntFltCutoff = 0.74090; - IntFltLen = 18.0; - LPFltAlpha = 1.79306; - LPFltBaseLen = 7.00; - LPFltCutoffMult = 0.68881; - } -}; - -/** - * @brief Set of resizing algorithm parameters for lower-ringing performance - * (8.86/1.046/0.010168). - * - * This set of resizing algorithm parameters offers a lower-ringing - * performance in comparison to the default setting, at the expense of higher - * aliasing artifacts and a slightly reduced contrast. - */ - -struct CImageResizerParamsLow : public CImageResizerParams -{ - CImageResizerParamsLow() - { - CorrFltAlpha = 1.0;//8.86/1.92/1.046(871.54)/0.010168:258647,442252 - CorrFltLen = 6.09757; - IntFltAlpha = 2.36704; - IntFltCutoff = 0.74674; - IntFltLen = 18.0; - LPFltAlpha = 2.19427; - LPFltBaseLen = 7.66; - LPFltCutoffMult = 0.75380; - } -}; - -/** - * @brief Set of resizing algorithm parameters for low-aliasing - * resizing (11.81/1.012/0.038379). - * - * This set of resizing algorithm parameters offers a considerable - * anti-aliasing performance with a good frequency response linearity (and - * contrast). This is an intermediate setting between the default and Ultra - * parameters. - */ - -struct CImageResizerParamsHigh : public CImageResizerParams -{ - CImageResizerParamsHigh() - { - CorrFltAlpha = 1.0;//11.81/1.83/1.012(307.84)/0.038379:258660,452719 - CorrFltLen = 6.80909; - IntFltAlpha = 2.44917; - IntFltCutoff = 0.75856; - IntFltLen = 18.0; - LPFltAlpha = 4.39527; - LPFltBaseLen = 8.18; - LPFltCutoffMult = 0.79172; - } -}; - -/** - * @brief Set of resizing algorithm parameters for ultra low-aliasing - * resizing (13.65/1.001/0.000483). - * - * This set of resizing algorithm parameters offers a very considerable - * anti-aliasing performance with a good frequency response linearity (and - * contrast). This set of parameters is computationally expensive and may - * produce ringing artifacts on sharp features. - */ - -struct CImageResizerParamsUltra : public CImageResizerParams -{ - CImageResizerParamsUltra() - { - CorrFltAlpha = 1.0;//13.65/1.79/1.001(28288.41)/0.000483:258658,457974 - CorrFltLen = 7.48060; - IntFltAlpha = 1.93750; - IntFltCutoff = 0.75462; - IntFltLen = 18.0; - LPFltAlpha = 5.55209; - LPFltBaseLen = 8.34; - LPFltCutoffMult = 0.78002; - } -}; - -/** - * @brief Image resizing variables class. - * - * This is an utility "catch all" class that defines various variables used - * during image resizing. Several variables that are explicitly initialized in - * this class' constructor are also used as additional "input" variables to - * the image resizing function. These variables will not be changed by the - * avir::CImageResizer<>::resizeImage() function. - */ - -class CImageResizerVars -{ -public: - int ElCount; ///< The number of "fptype" elements used to store 1 pixel. - ///< - int ElCountIO; ///< The number of source and destination image's elements - ///< used to store 1 pixel. - ///< - int fppack; ///< The number of atomic types stored in a single "fptype" - ///< element. - ///< - int fpalign; ///< Suggested alignment size in bytes. This is not a - ///< required alignment, because image resizing algorithm cannot be - ///< made to have a strictly aligned data access in all cases (e.g. - ///< de-interleaved interpolation cannot perform aligned accesses). - ///< - int elalign; ///< Length alignment of arrays of elements. This applies to - ///< filters and intermediate buffers: this constant forces filters - ///< and scanlines to have a length which is a multiple of this value, - ///< for more efficient SIMD implementation. - ///< - int packmode; ///< 0 if interleaved packing, 1 if de-interleaved. - ///< - int BufLen[ 2 ]; ///< Intermediate buffers' lengths in "fptype" elements. - int BufOffs[ 2 ]; ///< Offsets into the intermediate buffers, used to - ///< provide prefix elements required during processing so that no - ///< "out of range" access happens. This offset is a multiple of - ///< ElCount if pixels are stored in interleaved form. - ///< - double k; ///< Resizing step coefficient, updated to reflect the actually - ///< used coefficient during resizing. - ///< - double o; ///< Starting pixel offset inside the source image, updated to - ///< reflect the actually used offset during resizing. - ///< - int ResizeStep; ///< Index of the resizing step in the latest filtering - ///< steps array. - ///< - double InGammaMult; ///< Input gamma multiplier, used to convert input - ///< data to 0 to 1 range. 0.0 if no gamma is in use. - ///< - double OutGammaMult; ///< Output gamma multiplier, used to convert data to - ///< 0 to 255/65535 range. 0.0 if no gamma is in use. - ///< - - double ox; ///< Start X pixel offset within source image (can be - ///< negative). Positive offset moves image to the left. - ///< - double oy; ///< Start Y pixel offset within source image (can be - ///< negative). Positive offset moves image to the top. - ///< - CImageResizerThreadPool* ThreadPool; ///< Thread pool to be used by the - ///< image resizing function. Set to NULL to use single-threaded - ///< processing. - ///< - bool UseSRGBGamma; ///< Perform sRGB gamma linearization (correction). - ///< - int BuildMode; ///< The build mode to use, for debugging purposes. Set to - ///< -1 to select a minimal-complexity mode automatically. All build - ///< modes deliver similar results with minor deviations. - ///< - int RndSeed; ///< Random seed parameter. This parameter may be incremented - ///< after each random generator initialization. The use of this - ///< variable depends on the ditherer implementation. - ///< - - CImageResizerVars() - : ox( 0.0 ) - , oy( 0.0 ) - , ThreadPool( NULL ) - , UseSRGBGamma( false ) - , BuildMode( -1 ) - , RndSeed( 0 ) - { - } -}; - -/** - * @brief Image resizer's filtering step class. - * - * Class defines data to perform a single filtering step over a whole - * horizontal or vertical scanline. Resizing consists of 1 or more steps that - * may be performed before the actual resizing takes place. Filtering may also - * follow a resizing step. Each step must ensure that scanline data contains - * enough pixels to perform the next step (which may be resizing) without - * exceeding scanline's bounds. - * - * A derived class must implement several "const" and "static" functions that - * are used to perform the actual filtering in interleaved or de-interleaved - * mode. - * - * @tparam fptype Floating point type to use for storing pixel elements. SIMD - * types can be used: in this case each element may hold a whole pixel. - * @tparam fptypeatom The atomic type the "fptype" consists of. - */ - -template< class fptype, class fptypeatom > -class CImageResizerFilterStep -{ -public: - bool IsUpsample; ///< "True" if this step is an upsampling step, "false" - ///< if downsampling step. Should be set to "false" if ResampleFactor - ///< equals 0. - ///< - int ResampleFactor; ///< Resample factor (>=1). If 0, this is a resizing - ///< step. This value should be >1 if IsUpsample equals "true". - ///< - CBuffer< fptype > Flt; ///< Filter to use at this step. - ///< - CFltBuffer FltOrig; ///< Originally-designed filter. This buffer may not - ///< be assigned. Assigned by filters that precede the resizing step - ///< if such filter is planned to be embedded into the interpolation - ///< filter as "external" filter. If IsUpsample=true and this filter - ///< buffer is not empty, the upsampling step will not itself apply - ///< any filtering over upsampled input scanline. - ///< - double DCGain; ///< DC gain which was applied to the filter. Not defined - ///< if ResampleFactor = 0. - ///< - int FltLatency; ///< Filter's latency (group delay, shift) in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int InLen; ///< Input scanline's length in pixels. - ///< - int InBuf; ///< Input buffer index, 0 or 1. - ///< - int InPrefix; ///< Required input prefix pixels. These prefix pixels will - ///< be filled with source scanline's first pixel value. If IsUpsample - ///< is "true", this is the additional number of times the first pixel - ///< will be filtered before processing scanline, this number is also - ///< reflected in the OutPrefix. - ///< - int InSuffix; ///< Required input suffix pixels. These suffix pixels will - ///< be filled with source scanline's last pixel value. If IsUpsample - ///< is "true", this is the additional number of times the last pixel - ///< will be filtered before processing scanline, this number is also - ///< reflected in the OutSuffix. - ///< - int InElIncr; ///< Pixel element increment within the input buffer, used - ///< during de-interleaved processing: in this case each image's - ///< channel is stored independently, InElIncr elements apart. - ///< - int OutLen; ///< Length of the resulting scanline. - ///< - int OutBuf; ///< Output buffer index. 0 or 1; 2 for the last step. - ///< - int OutPrefix; ///< Required output prefix pixels. These prefix pixels - ///< will not be pre-filled with any values. Value is valid only if - ///< IsUpsample equals "true". - ///< - int OutSuffix; ///< Required input suffix pixels. These suffix pixels will - ///< not be pre-filled with any values. Value is valid only if - ///< IsUpsample equals "true". - ///< - int OutElIncr; ///< Pixel element increment within the output buffer, used - ///< during de-interleaved processing. Equals to the InBufElIncr of - ///< the next step. - ///< - CBuffer< fptype > PrefixDC; ///< DC component fluctuations added at the - ///< start of the resulting scanline, used when IsUpsample equals - ///< "true". - ///< - CBuffer< fptype > SuffixDC; ///< DC component fluctuations added at the - ///< end of the resulting scanline, used when IsUpsample equals - ///< "true". - ///< - int EdgePixelCount; ///< The number of edge pixels added. Affects the - ///< initial position within the input scanline, used to produce edge - ///< pixels. This variable is used and should be defined when - ///< IsUpsample=false and ResampleFactor>0. When assigning this - ///< variable it is also necessary to update InPrefix, OutLen and - ///< Vars.o variables. - ///< - static const int EdgePixelCountDef = 3; ///< The default number of pixels - ///< additionally produced at scanline edges during filtering. This is - ///< required to reduce edge artifacts. - ///< - - /** - * @brief Resizing position structure. - * - * Structure holds resizing position and pointer to fractional delay - * filter. - */ - - struct CResizePos - { - int SrcPosInt; ///< Source scanline position. - ///< - int fti; ///< Fractional delay filter index. - ///< - const fptype* ftp; ///< Fractional delay filter pointer. - ///< - fptypeatom x; ///< Interpolation coefficient between delay filters. - ///< - int SrcOffs; ///< Source scanline offset. - ///< - }; - - /** - * @brief Resizing positions buffer class. - * - * This class combines buffer together with variables that define resizing - * stepping. - */ - - class CRPosBuf : public CBuffer< CResizePos > - { - public: - double k; ///< Resizing step. - ///< - double o; ///< Resizing offset. - ///< - int FracCount; ///< The number of fractional delay filters in a filter - ///< bank used together with this buffer. - ///< - }; - - /** - * @brief Resizing positions buffer array class. - * - * This class combines structure array of the CRPosBuf class objects with - * the function that locates or creates buffer with the required resizing - * stepping. - */ - - class CRPosBufArray : public CStructArray< CRPosBuf > - { - public: - using CStructArray< CRPosBuf > :: add; - using CStructArray< CRPosBuf > :: getItemCount; - - /** - * Function returns the resizing positions buffer with the required - * stepping. If no such buffer exists, it is created. - * - * @param k Resizing step. - * @param o Resizing offset. - * @param FracCount The number of fractional delay filters in a filter - * bank used together with this buffer. - * @return Reference to the CRPosBuf object. - */ - - CRPosBuf& getRPosBuf( const double k, const double o, - const int FracCount ) - { - int i; - - for( i = 0; i < getItemCount(); i++ ) - { - CRPosBuf& Buf = (*this)[ i ]; - - if( Buf.k == k && Buf.o == o && Buf.FracCount == FracCount ) - { - return( Buf ); - } - } - - CRPosBuf& NewBuf = add(); - NewBuf.k = k; - NewBuf.o = o; - NewBuf.FracCount = FracCount; - - return( NewBuf ); - } - }; - - CRPosBuf* RPosBuf; ///< Resizing positions buffer. Used when - ///< ResampleFactor equals 0 (resizing step). - ///< - CDSPFracFilterBankLin< fptype >* FltBank; ///< Filter bank in use by *this - ///< resizing step. - ///< -}; - -/** - * @brief Interleaved filtering steps implementation class. - * - * This class implements scanline filtering functions in interleaved mode. - * This means that each pixel is processed independently, not in groups. - * - * @tparam fptype Floating point type to use for storing pixel elements. SIMD - * types can be used: in this case each element may hold a whole pixel. - * @tparam fptypeatom The atomic type the "fptype" consists of. - */ - -template< class fptype, class fptypeatom > -class CImageResizerFilterStepINL : - public CImageResizerFilterStep< fptype, fptypeatom > -{ -public: - using CImageResizerFilterStep< fptype, fptypeatom > :: IsUpsample; - using CImageResizerFilterStep< fptype, fptypeatom > :: ResampleFactor; - using CImageResizerFilterStep< fptype, fptypeatom > :: Flt; - using CImageResizerFilterStep< fptype, fptypeatom > :: FltOrig; - using CImageResizerFilterStep< fptype, fptypeatom > :: FltLatency; - using CImageResizerFilterStep< fptype, fptypeatom > :: Vars; - using CImageResizerFilterStep< fptype, fptypeatom > :: InLen; - using CImageResizerFilterStep< fptype, fptypeatom > :: InPrefix; - using CImageResizerFilterStep< fptype, fptypeatom > :: InSuffix; - using CImageResizerFilterStep< fptype, fptypeatom > :: OutLen; - using CImageResizerFilterStep< fptype, fptypeatom > :: OutPrefix; - using CImageResizerFilterStep< fptype, fptypeatom > :: OutSuffix; - using CImageResizerFilterStep< fptype, fptypeatom > :: PrefixDC; - using CImageResizerFilterStep< fptype, fptypeatom > :: SuffixDC; - using CImageResizerFilterStep< fptype, fptypeatom > :: RPosBuf; - using CImageResizerFilterStep< fptype, fptypeatom > :: FltBank; - using CImageResizerFilterStep< fptype, fptypeatom > :: EdgePixelCount; - - /** - * Function performs "packing" of a scanline and type conversion. - * Scanline, depending on the "fptype" can be potentially stored as a - * packed SIMD values having a certain atomic type. If required, the sRGB - * gamma correction is applied. - * - * @param ip Input scanline. - * @param op0 Output scanline. - * @param l0 The number of pixels to "pack". - */ - - template< class Tin > - void packScanline( const Tin* ip, fptype* const op0, const int l0 ) const - { - const int ElCount = Vars -> ElCount; - const int ElCountIO = Vars -> ElCountIO; - fptype* op = op0; - int l = l0; - - if( !Vars -> UseSRGBGamma ) - { - if( ElCountIO == 1 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = (fptypeatom) ip[ 0 ]; - op += ElCount; - ip++; - l--; - } - } - else - if( ElCountIO == 4 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = (fptypeatom) ip[ 0 ]; - v[ 1 ] = (fptypeatom) ip[ 1 ]; - v[ 2 ] = (fptypeatom) ip[ 2 ]; - v[ 3 ] = (fptypeatom) ip[ 3 ]; - op += ElCount; - ip += 4; - l--; - } - } - else - if( ElCountIO == 3 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = (fptypeatom) ip[ 0 ]; - v[ 1 ] = (fptypeatom) ip[ 1 ]; - v[ 2 ] = (fptypeatom) ip[ 2 ]; - op += ElCount; - ip += 3; - l--; - } - } - else - if( ElCountIO == 2 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = (fptypeatom) ip[ 0 ]; - v[ 1 ] = (fptypeatom) ip[ 1 ]; - op += ElCount; - ip += 2; - l--; - } - } - } - else - { - const fptypeatom gm = (fptypeatom) Vars -> InGammaMult; - - if( ElCountIO == 1 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm ); - op += ElCount; - ip++; - l--; - } - } - else - if( ElCountIO == 4 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm ); - v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm ); - v[ 2 ] = convertSRGB2Lin( (fptypeatom) ip[ 2 ] * gm ); - v[ 3 ] = convertSRGB2Lin( (fptypeatom) ip[ 3 ] * gm ); - op += ElCount; - ip += 4; - l--; - } - } - else - if( ElCountIO == 3 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm ); - v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm ); - v[ 2 ] = convertSRGB2Lin( (fptypeatom) ip[ 2 ] * gm ); - op += ElCount; - ip += 3; - l--; - } - } - else - if( ElCountIO == 2 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op; - v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm ); - v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm ); - op += ElCount; - ip += 2; - l--; - } - } - } - - const int ZeroCount = ElCount * Vars -> fppack - ElCountIO; - op = op0; - l = l0; - - if( ZeroCount == 1 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op + ElCountIO; - v[ 0 ] = (fptypeatom) 0; - op += ElCount; - l--; - } - } - else - if( ZeroCount == 2 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op + ElCountIO; - v[ 0 ] = (fptypeatom) 0; - v[ 1 ] = (fptypeatom) 0; - op += ElCount; - l--; - } - } - else - if( ZeroCount == 3 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) op + ElCountIO; - v[ 0 ] = (fptypeatom) 0; - v[ 1 ] = (fptypeatom) 0; - v[ 2 ] = (fptypeatom) 0; - op += ElCount; - l--; - } - } - } - - /** - * Function applies Linear to sRGB gamma correction to the specified - * scanline. - * - * @param p Scanline. - * @param l The number of pixels to de-linearize. - * @param Vars0 Image resizing-related variables. - */ - - static void applySRGBGamma( fptype* p, int l, - const CImageResizerVars& Vars0 ) - { - const int ElCount = Vars0.ElCount; - const int ElCountIO = Vars0.ElCountIO; - const fptypeatom gm = (fptypeatom) Vars0.OutGammaMult; - - if( ElCountIO == 1 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) p; - v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm; - p += ElCount; - l--; - } - } - else - if( ElCountIO == 4 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) p; - v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm; - v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm; - v[ 2 ] = convertLin2SRGB( v[ 2 ]) * gm; - v[ 3 ] = convertLin2SRGB( v[ 3 ]) * gm; - p += ElCount; - l--; - } - } - else - if( ElCountIO == 3 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) p; - v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm; - v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm; - v[ 2 ] = convertLin2SRGB( v[ 2 ]) * gm; - p += ElCount; - l--; - } - } - else - if( ElCountIO == 2 ) - { - while( l > 0 ) - { - fptypeatom* v = (fptypeatom*) p; - v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm; - v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm; - p += ElCount; - l--; - } - } - } - - /** - * Function converts vertical scanline to horizontal scanline. This - * function is called by the image resizer when image is resized - * vertically. This means that the vertical scanline is stored in the - * same format produced by the packScanline() and maintained by other - * filtering functions. - * - * @param ip Input vertical scanline. - * @param op Output buffer (temporary buffer used during resizing). - * @param SrcLen The number of pixels in the input scanline, also used to - * calculate input buffer increment. - * @param SrcIncr Input buffer increment to the next vertical pixel. - */ - - void convertVtoH( const fptype* ip, fptype* op, const int SrcLen, - const int SrcIncr ) const - { - const int ElCount = Vars -> ElCount; - int j; - - if( ElCount == 1 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - ip += SrcIncr; - op++; - } - } - else - if( ElCount == 4 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - ip += SrcIncr; - op += 4; - } - } - else - if( ElCount == 3 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - ip += SrcIncr; - op += 3; - } - } - else - if( ElCount == 2 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - ip += SrcIncr; - op += 2; - } - } - } - - /** - * Function performs "unpacking" of a scanline and type conversion - * (truncation is used when floating point is converted to integer). - * Scanline, depending on the "fptype" can be potentially stored as a - * packed SIMD values having a certain atomic type. The unpacking function - * assumes that scanline is stored in the style produced by the - * packScanline() function. - * - * @param ip Input scanline. - * @param op Output scanline. - * @param l The number of pixels to "unpack". - * @param Vars0 Image resizing-related variables. - */ - - template< class Tout > - static void unpackScanline( const fptype* ip, Tout* op, int l, - const CImageResizerVars& Vars0 ) - { - const int ElCount = Vars0.ElCount; - const int ElCountIO = Vars0.ElCountIO; - /* dprintf(2, "BOOP ElCount=%d ElCountIO=%d l=%d op=%p\n", ElCount, ElCountIO, l, op); */ - - if( ElCountIO == 1 ) - { - while( l > 0 ) - { - const fptypeatom* v = (const fptypeatom*) ip; - op[ 0 ] = (Tout) v[ 0 ]; - ip += ElCount; - op++; - l--; - } - } - else - if( ElCountIO == 4 ) - { - while( l > 0 ) - { - const fptypeatom* v = (const fptypeatom*) ip; - op[ 0 ] = (Tout) v[ 0 ]; - op[ 1 ] = (Tout) v[ 1 ]; - op[ 2 ] = (Tout) v[ 2 ]; - op[ 3 ] = (Tout) v[ 3 ]; - ip += ElCount; - op += 4; - l--; - } - } - else - if( ElCountIO == 3 ) - { - while( l > 0 ) - { - const fptypeatom* v = (const fptypeatom*) ip; - /* DebugBreak(); */ - /* dprintf(2, "BOOP ElCount=%d ElCountIO=%d l=%d op=%p v=%p v[0]=%d\n", ElCount, ElCountIO, l, op, v, (Tout)v[0]); */ - op[ 0 ] = (Tout) v[ 0 ]; - op[ 1 ] = (Tout) v[ 1 ]; - op[ 2 ] = (Tout) v[ 2 ]; - ip += ElCount; - op += 3; - l--; - } - } - else - if( ElCountIO == 2 ) - { - while( l > 0 ) - { - const fptypeatom* v = (const fptypeatom*) ip; - op[ 0 ] = (Tout) v[ 0 ]; - op[ 1 ] = (Tout) v[ 1 ]; - ip += ElCount; - op += 2; - l--; - } - } - } - - /** - * Function prepares input scanline buffer for *this filtering step. - * Left- and right-most pixels are replicated to make sure no buffer - * overrun happens. Such approach also allows to bypass any pointer - * range checks. - * - * @param Src Source buffer. - */ - - void prepareInBuf( fptype* Src ) const - { - if( IsUpsample || InPrefix + InSuffix == 0 ) - { - return; - } - - const int ElCount = Vars -> ElCount; - replicateArray( Src, ElCount, Src - ElCount, InPrefix, -ElCount ); - - Src += ( InLen - 1 ) * ElCount; - replicateArray( Src, ElCount, Src + ElCount, InSuffix, ElCount ); - } - - /** - * Function peforms scanline upsampling with filtering. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to ElCount. - * @param Dst Destination scanline buffer. - */ - - void doUpsample( const fptype* const Src, fptype* const Dst ) const - { - const int ElCount = Vars -> ElCount; - fptype* op0 = &Dst[ -OutPrefix * ElCount ]; - memset( op0, 0, ( OutPrefix + OutLen + OutSuffix ) * ElCount * - sizeof( fptype )); - - const fptype* ip = Src; - const int opstep = ElCount * ResampleFactor; - int l; - - if( FltOrig.getCapacity() > 0 ) - { - // Do not perform filtering, only upsample. - - op0 += ( OutPrefix % ResampleFactor ) * ElCount; - l = OutPrefix / ResampleFactor; - - if( ElCount == 1 ) - { - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while( l >= 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0 += opstep; - l--; - } - } - else - if( ElCount == 4 ) - { - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0[ 2 ] = ip[ 2 ]; - op0[ 3 ] = ip[ 3 ]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0[ 2 ] = ip[ 2 ]; - op0[ 3 ] = ip[ 3 ]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while( l >= 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0[ 2 ] = ip[ 2 ]; - op0[ 3 ] = ip[ 3 ]; - op0 += opstep; - l--; - } - } - else - if( ElCount == 3 ) - { - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0[ 2 ] = ip[ 2 ]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0[ 2 ] = ip[ 2 ]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while( l >= 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0[ 2 ] = ip[ 2 ]; - op0 += opstep; - l--; - } - } - else - if( ElCount == 2 ) - { - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0 += opstep; - ip += ElCount; - l--; - } - - l = OutSuffix / ResampleFactor; - - while( l >= 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0[ 1 ] = ip[ 1 ]; - op0 += opstep; - l--; - } - } - - return; - } - - const fptype* const f = Flt; - const int flen = Flt.getCapacity(); - fptype* op; - int i; - - if( ElCount == 1 ) - { - l = InPrefix; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ i ] += f[ i ] * ip[ 0 ]; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ i ] += f[ i ] * ip[ 0 ]; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while( l >= 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ i ] += f[ i ] * ip[ 0 ]; - } - - op0 += opstep; - l--; - } - } - else - if( ElCount == 4 ) - { - l = InPrefix; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op[ 2 ] += f[ i ] * ip[ 2 ]; - op[ 3 ] += f[ i ] * ip[ 3 ]; - op += 4; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op[ 2 ] += f[ i ] * ip[ 2 ]; - op[ 3 ] += f[ i ] * ip[ 3 ]; - op += 4; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while( l >= 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op[ 2 ] += f[ i ] * ip[ 2 ]; - op[ 3 ] += f[ i ] * ip[ 3 ]; - op += 4; - } - - op0 += opstep; - l--; - } - } - else - if( ElCount == 3 ) - { - l = InPrefix; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op[ 2 ] += f[ i ] * ip[ 2 ]; - op += 3; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op[ 2 ] += f[ i ] * ip[ 2 ]; - op += 3; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while( l >= 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op[ 2 ] += f[ i ] * ip[ 2 ]; - op += 3; - } - - op0 += opstep; - l--; - } - } - else - if( ElCount == 2 ) - { - l = InPrefix; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op += 2; - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op += 2; - } - - ip += ElCount; - op0 += opstep; - l--; - } - - l = InSuffix; - - while( l >= 0 ) - { - op = op0; - - for( i = 0; i < flen; i++ ) - { - op[ 0 ] += f[ i ] * ip[ 0 ]; - op[ 1 ] += f[ i ] * ip[ 1 ]; - op += 2; - } - - op0 += opstep; - l--; - } - } - - op = op0; - const fptype* dc = SuffixDC; - l = SuffixDC.getCapacity(); - - if( ElCount == 1 ) - { - for( i = 0; i < l; i++ ) - { - op[ i ] += ip[ 0 ] * dc[ i ]; - } - } - else - if( ElCount == 4 ) - { - while( l > 0 ) - { - op[ 0 ] += ip[ 0 ] * dc[ 0 ]; - op[ 1 ] += ip[ 1 ] * dc[ 0 ]; - op[ 2 ] += ip[ 2 ] * dc[ 0 ]; - op[ 3 ] += ip[ 3 ] * dc[ 0 ]; - dc++; - op += 4; - l--; - } - } - else - if( ElCount == 3 ) - { - while( l > 0 ) - { - op[ 0 ] += ip[ 0 ] * dc[ 0 ]; - op[ 1 ] += ip[ 1 ] * dc[ 0 ]; - op[ 2 ] += ip[ 2 ] * dc[ 0 ]; - dc++; - op += 3; - l--; - } - } - else - if( ElCount == 2 ) - { - while( l > 0 ) - { - op[ 0 ] += ip[ 0 ] * dc[ 0 ]; - op[ 1 ] += ip[ 1 ] * dc[ 0 ]; - dc++; - op += 2; - l--; - } - } - - ip = Src; - op = Dst - InPrefix * opstep; - dc = PrefixDC; - l = PrefixDC.getCapacity(); - - if( ElCount == 1 ) - { - for( i = 0; i < l; i++ ) - { - op[ i ] += ip[ 0 ] * dc[ i ]; - } - } - else - if( ElCount == 4 ) - { - while( l > 0 ) - { - op[ 0 ] += ip[ 0 ] * dc[ 0 ]; - op[ 1 ] += ip[ 1 ] * dc[ 0 ]; - op[ 2 ] += ip[ 2 ] * dc[ 0 ]; - op[ 3 ] += ip[ 3 ] * dc[ 0 ]; - dc++; - op += 4; - l--; - } - } - else - if( ElCount == 3 ) - { - while( l > 0 ) - { - op[ 0 ] += ip[ 0 ] * dc[ 0 ]; - op[ 1 ] += ip[ 1 ] * dc[ 0 ]; - op[ 2 ] += ip[ 2 ] * dc[ 0 ]; - dc++; - op += 3; - l--; - } - } - else - if( ElCount == 2 ) - { - while( l > 0 ) - { - op[ 0 ] += ip[ 0 ] * dc[ 0 ]; - op[ 1 ] += ip[ 1 ] * dc[ 0 ]; - dc++; - op += 2; - l--; - } - } - } - - /** - * Function peforms scanline filtering with optional downsampling. - * Function makes use of the symmetry of the filter. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to ElCount. - * @param Dst Destination scanline buffer. - * @param DstIncr Destination scanline buffer increment, used for - * horizontal or vertical scanline stepping. - */ - - void doFilter( const fptype* const Src, fptype* Dst, - const int DstIncr ) const - { - const int ElCount = Vars -> ElCount; - const fptype* const f = &Flt[ FltLatency ]; - const int flen = FltLatency + 1; - const int ipstep = ElCount * ResampleFactor; - const fptype* ip = Src - EdgePixelCount * ipstep; - const fptype* ip1; - const fptype* ip2; - int l = OutLen; - int i; - - if( ElCount == 1 ) - { - while( l > 0 ) - { - fptype s = f[ 0 ] * ip[ 0 ]; - ip1 = ip; - ip2 = ip; - - for( i = 1; i < flen; i++ ) - { - ip1++; - ip2--; - s += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]); - } - - Dst[ 0 ] = s; - Dst += DstIncr; - ip += ipstep; - l--; - } - } - else - if( ElCount == 4 ) - { - while( l > 0 ) - { - fptype s1 = f[ 0 ] * ip[ 0 ]; - fptype s2 = f[ 0 ] * ip[ 1 ]; - fptype s3 = f[ 0 ] * ip[ 2 ]; - fptype s4 = f[ 0 ] * ip[ 3 ]; - ip1 = ip; - ip2 = ip; - - for( i = 1; i < flen; i++ ) - { - ip1 += 4; - ip2 -= 4; - s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]); - s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]); - s3 += f[ i ] * ( ip1[ 2 ] + ip2[ 2 ]); - s4 += f[ i ] * ( ip1[ 3 ] + ip2[ 3 ]); - } - - Dst[ 0 ] = s1; - Dst[ 1 ] = s2; - Dst[ 2 ] = s3; - Dst[ 3 ] = s4; - Dst += DstIncr; - ip += ipstep; - l--; - } - } - else - if( ElCount == 3 ) - { - while( l > 0 ) - { - fptype s1 = f[ 0 ] * ip[ 0 ]; - fptype s2 = f[ 0 ] * ip[ 1 ]; - fptype s3 = f[ 0 ] * ip[ 2 ]; - ip1 = ip; - ip2 = ip; - - for( i = 1; i < flen; i++ ) - { - ip1 += 3; - ip2 -= 3; - s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]); - s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]); - s3 += f[ i ] * ( ip1[ 2 ] + ip2[ 2 ]); - } - - Dst[ 0 ] = s1; - Dst[ 1 ] = s2; - Dst[ 2 ] = s3; - Dst += DstIncr; - ip += ipstep; - l--; - } - } - else - if( ElCount == 2 ) - { - while( l > 0 ) - { - fptype s1 = f[ 0 ] * ip[ 0 ]; - fptype s2 = f[ 0 ] * ip[ 1 ]; - ip1 = ip; - ip2 = ip; - - for( i = 1; i < flen; i++ ) - { - ip1 += 2; - ip2 -= 2; - s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]); - s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]); - } - - Dst[ 0 ] = s1; - Dst[ 1 ] = s2; - Dst += DstIncr; - ip += ipstep; - l--; - } - } - } - - /** - * Function performs resizing of a single scanline. This function does - * not "know" about the length of the source scanline buffer. This buffer - * should be padded with enough pixels so that ( SrcPos - FilterLenD2 ) is - * always >= 0 and ( SrcPos + ( DstLineLen - 1 ) * k + FilterLenD2 + 1 ) - * does not exceed source scanline's buffer length. SrcLine's increment is - * assumed to be equal to ElCount. - * - * @param SrcLine Source scanline buffer. - * @param DstLine Destination (resized) scanline buffer. - * @param DstLineIncr Destination scanline position increment, used for - * horizontal or vertical scanline stepping. - * @param xx Temporary buffer, of size FltBank -> getFilterLen(), must be - * aligned by fpclass :: fpalign. - */ - - void doResize( const fptype* SrcLine, fptype* DstLine, - const int DstLineIncr, fptype* const ) const - { - const int IntFltLen = FltBank -> getFilterLen(); - const int ElCount = Vars -> ElCount; - const typename CImageResizerFilterStep< fptype, fptypeatom > :: - CResizePos* rpos = &(*RPosBuf)[ 0 ]; - - const typename CImageResizerFilterStep< fptype, fptypeatom > :: - CResizePos* const rpose = rpos + OutLen; - -#define AVIR_RESIZE_PART1 \ - while( rpos < rpose ) \ - { \ - const fptype x = (fptype) rpos -> x; \ - const fptype* const ftp = rpos -> ftp; \ - const fptype* const ftp2 = ftp + IntFltLen; \ - const fptype* Src = SrcLine + rpos -> SrcOffs; \ - int i; - -#define AVIR_RESIZE_PART1nx \ - while( rpos < rpose ) \ - { \ - const fptype* const ftp = rpos -> ftp; \ - const fptype* Src = SrcLine + rpos -> SrcOffs; \ - int i; - -#define AVIR_RESIZE_PART2 \ - DstLine += DstLineIncr; \ - rpos++; \ - } - - if( FltBank -> getOrder() == 1 ) - { - if( ElCount == 1 ) - { - AVIR_RESIZE_PART1 - - fptype sum = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - sum += ( ftp[ i ] + ftp2[ i ] * x ) * Src[ i ]; - } - - DstLine[ 0 ] = sum; - - AVIR_RESIZE_PART2 - } - else - if( ElCount == 4 ) - { - AVIR_RESIZE_PART1 - - fptype sum[ 4 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - sum[ 2 ] = 0.0; - sum[ 3 ] = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - const fptype xx = ftp[ i ] + ftp2[ i ] * x; - sum[ 0 ] += xx * Src[ 0 ]; - sum[ 1 ] += xx * Src[ 1 ]; - sum[ 2 ] += xx * Src[ 2 ]; - sum[ 3 ] += xx * Src[ 3 ]; - Src += 4; - } - - DstLine[ 0 ] = sum[ 0 ]; - DstLine[ 1 ] = sum[ 1 ]; - DstLine[ 2 ] = sum[ 2 ]; - DstLine[ 3 ] = sum[ 3 ]; - - AVIR_RESIZE_PART2 - } - else - if( ElCount == 3 ) - { - AVIR_RESIZE_PART1 - - fptype sum[ 3 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - sum[ 2 ] = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - const fptype xx = ftp[ i ] + ftp2[ i ] * x; - sum[ 0 ] += xx * Src[ 0 ]; - sum[ 1 ] += xx * Src[ 1 ]; - sum[ 2 ] += xx * Src[ 2 ]; - Src += 3; - } - - DstLine[ 0 ] = sum[ 0 ]; - DstLine[ 1 ] = sum[ 1 ]; - DstLine[ 2 ] = sum[ 2 ]; - - AVIR_RESIZE_PART2 - } - else - if( ElCount == 2 ) - { - AVIR_RESIZE_PART1 - - fptype sum[ 2 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - const fptype xx = ftp[ i ] + ftp2[ i ] * x; - sum[ 0 ] += xx * Src[ 0 ]; - sum[ 1 ] += xx * Src[ 1 ]; - Src += 2; - } - - DstLine[ 0 ] = sum[ 0 ]; - DstLine[ 1 ] = sum[ 1 ]; - - AVIR_RESIZE_PART2 - } - } - else - { - if( ElCount == 1 ) - { - AVIR_RESIZE_PART1nx - - fptype sum = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - sum += ftp[ i ] * Src[ i ]; - } - - DstLine[ 0 ] = sum; - - AVIR_RESIZE_PART2 - } - else - if( ElCount == 4 ) - { - AVIR_RESIZE_PART1nx - - fptype sum[ 4 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - sum[ 2 ] = 0.0; - sum[ 3 ] = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - const fptype xx = ftp[ i ]; - sum[ 0 ] += xx * Src[ 0 ]; - sum[ 1 ] += xx * Src[ 1 ]; - sum[ 2 ] += xx * Src[ 2 ]; - sum[ 3 ] += xx * Src[ 3 ]; - Src += 4; - } - - DstLine[ 0 ] = sum[ 0 ]; - DstLine[ 1 ] = sum[ 1 ]; - DstLine[ 2 ] = sum[ 2 ]; - DstLine[ 3 ] = sum[ 3 ]; - - AVIR_RESIZE_PART2 - } - else - if( ElCount == 3 ) - { - AVIR_RESIZE_PART1nx - - fptype sum[ 3 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - sum[ 2 ] = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - const fptype xx = ftp[ i ]; - sum[ 0 ] += xx * Src[ 0 ]; - sum[ 1 ] += xx * Src[ 1 ]; - sum[ 2 ] += xx * Src[ 2 ]; - Src += 3; - } - - DstLine[ 0 ] = sum[ 0 ]; - DstLine[ 1 ] = sum[ 1 ]; - DstLine[ 2 ] = sum[ 2 ]; - - AVIR_RESIZE_PART2 - } - else - if( ElCount == 2 ) - { - AVIR_RESIZE_PART1nx - - fptype sum[ 2 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - - for( i = 0; i < IntFltLen; i++ ) - { - const fptype xx = ftp[ i ]; - sum[ 0 ] += xx * Src[ 0 ]; - sum[ 1 ] += xx * Src[ 1 ]; - Src += 2; - } - - DstLine[ 0 ] = sum[ 0 ]; - DstLine[ 1 ] = sum[ 1 ]; - - AVIR_RESIZE_PART2 - } - } - } -#undef AVIR_RESIZE_PART2 -#undef AVIR_RESIZE_PART1nx -#undef AVIR_RESIZE_PART1 -}; - -/** - * @brief Image resizer's default dithering class. - * - * This class defines an object that performs rounding, clipping and dithering - * operations over horizontal scanline pixels before scanline is stored in the - * output buffer. - * - * The ditherer should expect the same storage order of the pixels in a - * scanline as used in the "filtering step" class. So, a separate ditherer - * class should be defined for each scanline pixel storage style. The default - * ditherer implements a simple rounding without dithering: it can be used for - * an efficient dithering method which can be multi-threaded. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types can be used. - */ - -template< class fptype > -class CImageResizerDithererDefINL -{ -public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init( const int aLen, const CImageResizerVars& aVars, - const double aTrMul, const double aPkOut ) - { - Len = aLen; - Vars = &aVars; - LenE = aLen * Vars -> ElCount; - TrMul0 = aTrMul; - PkOut0 = aPkOut; - } - - /** - * @return "True" if dithering is recursive relative to scanlines meaning - * multi-threaded execution is not supported by this dithering method. - */ - - static bool isRecursive() - { - return( false ); - } - - /** - * Function performs rounding and clipping operations. - * - * @param ResScanline The buffer containing the final scanline. - */ - - void dither( fptype* const ResScanline ) const - { - const fptype c0 = 0.0; - const fptype PkOut = (fptype) PkOut0; - int j; - - if( TrMul0 == 1.0 ) - { - // Optimization - do not perform bit depth truncation. - - for( j = 0; j < LenE; j++ ) - { - ResScanline[ j ] = clamp( round( ResScanline[ j ]), c0, - PkOut ); - } - } - else - { - const fptype TrMul = (fptype) TrMul0; - - for( j = 0; j < LenE; j++ ) - { - const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul; - ResScanline[ j ] = clamp( z0, c0, PkOut ); - } - } - } - -protected: - int Len; ///< Scanline's length in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int LenE; ///< = LenE * ElCount. - ///< - double TrMul0; ///< Bit-depth truncation multiplier. - ///< - double PkOut0; ///< Peak output value allowed. - ///< -}; - -/** - * @brief Image resizer's error-diffusion dithering class, interleaved mode. - * - * This ditherer implements error-diffusion dithering which looks good, and - * whose results are compressed by PNG well. This implementation uses - * weighting coefficients obtained via machine optimization and visual - * evaluation. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types can be used. - */ - -template< class fptype > -class CImageResizerDithererErrdINL : - public CImageResizerDithererDefINL< fptype > -{ -public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init( const int aLen, const CImageResizerVars& aVars, - const double aTrMul, const double aPkOut ) - { - CImageResizerDithererDefINL< fptype > :: init( aLen, aVars, aTrMul, - aPkOut ); - - ResScanlineDith0.alloc( LenE + Vars -> ElCount, sizeof( fptype )); - ResScanlineDith = ResScanlineDith0 + Vars -> ElCount; - int i; - - for( i = 0; i < LenE + Vars -> ElCount; i++ ) - { - ResScanlineDith0[ i ] = 0.0; - } - } - - static bool isRecursive() - { - return( true ); - } - - void dither( fptype* const ResScanline ) - { - const int ElCount = Vars -> ElCount; - const fptype c0 = 0.0; - const fptype TrMul = (fptype) TrMul0; - const fptype PkOut = (fptype) PkOut0; - int j; - - for( j = 0; j < LenE; j++ ) - { - ResScanline[ j ] += ResScanlineDith[ j ]; - ResScanlineDith[ j ] = 0.0; - } - - for( j = 0; j < LenE - ElCount; j++ ) - { - // Perform rounding, noise estimation and saturation. - - const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul; - const fptype Noise = ResScanline[ j ] - z0; - ResScanline[ j ] = clamp( z0, c0, PkOut ); - - ResScanline[ j + ElCount ] += Noise * (fptype) 0.364842; - ResScanlineDith[ j - ElCount ] += Noise * (fptype) 0.207305; - ResScanlineDith[ j ] += Noise * (fptype) 0.364842; - ResScanlineDith[ j + ElCount ] += Noise * (fptype) 0.063011; - } - - while( j < LenE ) - { - const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul; - const fptype Noise = ResScanline[ j ] - z0; - ResScanline[ j ] = clamp( z0, c0, PkOut ); - - ResScanlineDith[ j - ElCount ] += Noise * (fptype) 0.207305; - ResScanlineDith[ j ] += Noise * (fptype) 0.364842; - j++; - } - } - -protected: - using CImageResizerDithererDefINL< fptype > :: Len; - using CImageResizerDithererDefINL< fptype > :: Vars; - using CImageResizerDithererDefINL< fptype > :: LenE; - using CImageResizerDithererDefINL< fptype > :: TrMul0; - using CImageResizerDithererDefINL< fptype > :: PkOut0; - - CBuffer< fptype > ResScanlineDith0; ///< Error diffusion buffer. - ///< - fptype* ResScanlineDith; ///< Error diffusion buffer pointer which skips - ///< the first ElCount elements. - ///< -}; - -/** - * @brief Floating-point processing definition and abstraction class. - * - * This class defines several constants and typedefs that point to classes - * that should be used by the image resizing algorithm. Such "definition - * class" can be used to define alternative scanline processing algorithms - * (e.g. SIMD) and image scanline packing styles used during processing. This - * class also offers an abstraction layer for dithering, rounding and - * clamping (saturation) operation. - * - * The fpclass_def class can be used to define processing using both SIMD and - * non-SIMD types, but using algorithms that are operate on interleaved pixels - * and non-SIMD optimized themselves. - * - * @tparam afptype Floating point type to use for storing intermediate data - * and variables. For variables that are not used in intensive calculations - * the "double" type is always used. On the latest Intel processors (like - * i7-4770K) there is almost no performance difference between "double" and - * "float". Image quality differences between "double" and "float" are not - * apparent on 8-bit images. At the same time the "float" uses half amount of - * working memory the "double" type uses. SIMD types can be used. The - * functions round() and clamp() in the "avir" or other visible namespace - * should be available for the specified type. SIMD types allow to perform - * resizing of images with more than 4 channels, to be exact 4 * SIMD element - * number (e.g. 16 for float4), without modification of the image resizing - * algorithm required. - * @tparam afptypeatom The atomic type the "afptype" consists of. - * @tparam adith Ditherer class to use during processing. - */ - -template< class afptype, class afptypeatom = afptype, - class adith = CImageResizerDithererDefINL< afptype > > -class fpclass_def -{ -public: - typedef afptype fptype; ///< Floating-point type to use during processing. - ///< - typedef afptypeatom fptypeatom; ///< Atomic type "fptype" consists of. - ///< - static const int fppack = sizeof( fptype ) / sizeof( fptypeatom ); ///< - ///< The number of atomic types stored in a single "fptype" element. - ///< - static const int fpalign = sizeof( fptype ); ///< Suggested alignment size - ///< in bytes. This is not a required alignment, because image - ///< resizing algorithm cannot be made to have a strictly aligned data - ///< access at all steps (e.g. interpolation cannot perform aligned - ///< accesses). - ///< - static const int elalign = 1; ///< Length alignment of arrays of elements. - ///< This applies to filters and intermediate buffers: this constant - ///< forces filters and scanlines to have a length which is a multiple - ///< of this value, for more efficient SIMD implementation. - ///< - static const int packmode = 0; ///< 0 if interleaved packing, 1 if - ///< de-interleaved. - ///< - typedef CImageResizerFilterStepINL< fptype, fptypeatom > CFilterStep; ///< - ///< Filtering step class to use during processing. - ///< - typedef adith CDitherer; ///< Ditherer class to use during processing. - ///< -}; - -/** - * @brief Image resizer class. - * - * The object of this class can be used to resize 1-4 channel images to any - * required size. Resizing is performed by utilizing interpolated sinc - * fractional delay filters plus (if necessary) a cascade of built-in - * sinc function-based 2X upsampling or 2X downsampling stages, followed by a - * correction filtering. - * - * Object of this class can be allocated on stack. - * - * @tparam fpclass Floating-point processing definition class to use. See - * avir::fpclass_def for more details. - */ - -template< class fpclass = fpclass_def< float > > -class CImageResizer -{ -public: - /** - * Constructor initializes the resizer. - * - * @param aResBitDepth Required bit depth of resulting image (1-16). If - * integer value output is used (e.g. uint8_t), the bit depth also affects - * rounding: for example, if aResBitDepth=6 and "Tout" is uint8_t, the - * result will be rounded to 6 most significant bits (2 least significant - * bits truncated, with dithering applied). - * @param aSrcBitDepth Source image's real bit-depth. Set to 0 to use - * aResBitDepth. - * @param aParams Resizing algorithm's parameters to use. Leave out for - * default values. Can be useful when performing automatic optimization of - * parameters. - */ - - CImageResizer( const int aResBitDepth = 8, const int aSrcBitDepth = 0, - const CImageResizerParams& aParams = CImageResizerParamsDef() ) - : Params( aParams ) - , ResBitDepth( aResBitDepth ) - { - SrcBitDepth = ( aSrcBitDepth == 0 ? ResBitDepth : aSrcBitDepth ); - - initFilterBank( FixedFilterBank, 1.0, false, CFltBuffer() ); - FixedFilterBank.createAllFilters(); - } - - /** - * Function resizes image. - * - * @param SrcBuf Source image buffer. - * @param SrcWidth Source image width. - * @param SrcHeight Source image height. - * @param SrcScanlineSize Physical size of source scanline in elements - * (not bytes). If this value is below 1, SrcWidth * ElCountIO will be - * used as the physical source scanline size. - * @param[out] NewBuf Buffer to accept the resized image. Can be equal to - * SrcBuf if the size of the resized image is smaller or equal to source - * image in size. - * @param NewWidth New image width. - * @param NewHeight New image height. - * @param ElCountIO The number of elements (channels) used to store each - * source and destination pixel (1-4). - * @param k Resizing step (one output pixel corresponds to "k" input - * pixels). A downsizing factor if > 1.0; upsizing factor if <= 1.0. - * Multiply by -1 if you would like to bypass "ox" and "oy" adjustment - * which is done by default to produce a centered image. If step value - * equals 0, the step value will be chosen automatically and independently - * for horizontal and vertical resizing. - * @param[in,out] aVars Pointer to variables structure to be passed to the - * image resizing function. Can be NULL. Only variables that are - * initialized in default constructor of this structure are accepted by - * this function. These variables will not be changed by this function. - * All other variables can be modified by this function. The access to - * this object is not thread-safe, each concurrent instance of this - * function should use a separate aVars object. - * @tparam Tin Input buffer element's type. Can be uint8_t (0-255 value - * range), uint16_t (0-65535 value range), float (0.0-1.0 value range), - * double (0.0-1.0 value range). Larger integer types are treated as - * uint16_t. Signed integer types are unsupported. - * @tparam Tout Output buffer element's type. Can be uint8_t (0-255 value - * range), uint16_t (0-65535 value range), float (0.0-1.0 value range), - * double (0.0-1.0 value range). Larger integer types are treated as - * uint16_t. Signed integer types are unsupported. - */ - - template< class Tin, class Tout > - void resizeImage( const Tin* const SrcBuf, const int SrcWidth, - const int SrcHeight, int SrcScanlineSize, Tout* const NewBuf, - const int NewWidth, const int NewHeight, const int ElCountIO, - const double k, CImageResizerVars* const aVars = NULL ) const - { - if( SrcWidth == 0 || SrcHeight == 0 ) - { - memset( NewBuf, 0, (size_t) NewWidth * NewHeight * - sizeof( Tout )); - - return; - } - else - if( NewWidth == 0 || NewHeight == 0 ) - { - return; - } - - CImageResizerVars DefVars; - CImageResizerVars& Vars = ( aVars == NULL ? DefVars : *aVars ); - - CImageResizerThreadPool DefThreadPool; - CImageResizerThreadPool& ThreadPool = ( Vars.ThreadPool == NULL ? - DefThreadPool : *Vars.ThreadPool ); - - // Define resizing steps, also optionally modify offsets so that - // resizing produces a "centered" image. - - double kx; - double ky; - double ox = Vars.ox; - double oy = Vars.oy; - - if( k == 0.0 ) - { - if( NewWidth > SrcWidth ) - { - kx = (double) ( SrcWidth - 1 ) / ( NewWidth - 1 ); - } - else - { - kx = (double) SrcWidth / NewWidth; - ox += ( kx - 1.0 ) * 0.5; - } - - if( NewHeight > SrcHeight ) - { - ky = (double) ( SrcHeight - 1 ) / ( NewHeight - 1 ); - } - else - { - ky = (double) SrcHeight / NewHeight; - oy += ( ky - 1.0 ) * 0.5; - } - } - else - if( k > 0.0 ) - { - kx = k; - ky = k; - - if( k > 1.0 ) - { - const double ko = ( k - 1.0 ) * 0.5; - ox += ko; - oy += ko; - } - } - else - { - kx = -k; - ky = -k; - } - - // Evaluate pre-multipliers used on the output stage. - - const bool IsInFloat = ( (Tin) 0.4 != 0 ); - const bool IsOutFloat = ( (Tout) 0.4 != 0 ); - double OutMul; // Output multiplier. - - if( Vars.UseSRGBGamma ) - { - if( IsInFloat ) - { - Vars.InGammaMult = 1.0; - } - else - { - Vars.InGammaMult = - 1.0 / ( sizeof( Tin ) == 1 ? 255.0 : 65535.0 ); - } - - if( IsOutFloat ) - { - Vars.OutGammaMult = 1.0; - } - else - { - Vars.OutGammaMult = ( sizeof( Tout ) == 1 ? 255.0 : 65535.0 ); - } - - OutMul = 1.0; - } - else - { - if( IsOutFloat ) - { - OutMul = 1.0; - } - else - { - OutMul = ( sizeof( Tout ) == 1 ? 255.0 : 65535.0 ); - } - - if( !IsInFloat ) - { - OutMul /= ( sizeof( Tin ) == 1 ? 255.0 : 65535.0 ); - } - } - - // Fill widely-used variables. - - const int ElCount = ( ElCountIO + fpclass :: fppack - 1 ) / - fpclass :: fppack; - - const int NewWidthE = NewWidth * ElCount; - - if( SrcScanlineSize < 1 ) - { - SrcScanlineSize = SrcWidth * ElCountIO; - } - - Vars.ElCount = ElCount; - Vars.ElCountIO = ElCountIO; - Vars.fppack = fpclass :: fppack; - Vars.fpalign = fpclass :: fpalign; - Vars.elalign = fpclass :: elalign; - Vars.packmode = fpclass :: packmode; - - // Horizontal scanline filtering and resizing. - - CDSPFracFilterBankLin< fptype > FltBank; - CFilterSteps FltSteps; - typename CFilterStep :: CRPosBufArray RPosBufArray; - CBuffer< uint8_t > UsedFracMap; - - // Perform the filtering steps modeling at various modes, find the - // most efficient mode for both horizontal and vertical resizing. - - int UseBuildMode = 1; - const int BuildModeCount = - ( FixedFilterBank.getOrder() == 0 ? 4 : 2 ); - - int m; - - if( Vars.BuildMode >= 0 ) - { - UseBuildMode = Vars.BuildMode; - } - else - { - int BestScore = 0x7FFFFFFF; - - for( m = 0; m < BuildModeCount; m++ ) - { - CDSPFracFilterBankLin< fptype > TmpBank; - CFilterSteps TmpSteps; - Vars.k = kx; - Vars.o = ox; - buildFilterSteps( TmpSteps, Vars, TmpBank, OutMul, m, true ); - updateFilterStepBuffers( TmpSteps, Vars, RPosBufArray, - SrcWidth, NewWidth ); - - fillUsedFracMap( TmpSteps[ Vars.ResizeStep ], UsedFracMap ); - const int c = calcComplexity( TmpSteps, Vars, UsedFracMap, - SrcHeight ); - - if( c < BestScore ) - { - UseBuildMode = m; - BestScore = c; - } - } - } - - // Perform the actual filtering steps building. - - Vars.k = kx; - Vars.o = ox; - buildFilterSteps( FltSteps, Vars, FltBank, OutMul, UseBuildMode, - false ); - - updateFilterStepBuffers( FltSteps, Vars, RPosBufArray, SrcWidth, - NewWidth ); - - updateBufLenAndRPosPtrs( FltSteps, Vars, NewWidth ); - - const int ThreadCount = ThreadPool.getSuggestedWorkloadCount(); - // Includes the current thread. - - CStructArray< CThreadData< Tin, Tout > > td; - td.setItemCount( ThreadCount ); - int i; - - for( i = 0; i < ThreadCount; i++ ) - { - if( i > 0 ) - { - ThreadPool.addWorkload( &td[ i ]); - } - - td[ i ].init( i, ThreadCount, FltSteps, Vars ); - - td[ i ].initScanlineQueue( td[ i ].sopResizeH, SrcHeight, - SrcWidth ); - } - - CBuffer< fptype, size_t > FltBuf( (size_t) NewWidthE * SrcHeight, - fpclass :: fpalign ); // Temporary buffer that receives - // horizontally-filtered and resized image. - - for( i = 0; i < SrcHeight; i++ ) - { - td[ i % ThreadCount ].addScanlineToQueue( - (void*) &SrcBuf[ (size_t) i * SrcScanlineSize ], - &FltBuf[ (size_t) i * NewWidthE ]); - } - - ThreadPool.startAllWorkloads(); - td[ 0 ].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - - // Vertical scanline filtering and resizing, reuse previously defined - // filtering steps if possible. - - const int PrevUseBuildMode = UseBuildMode; - - if( Vars.BuildMode >= 0 ) - { - UseBuildMode = Vars.BuildMode; - } - else - { - CImageResizerVars TmpVars( Vars ); - int BestScore = 0x7FFFFFFF; - - for( m = 0; m < BuildModeCount; m++ ) - { - CDSPFracFilterBankLin< fptype > TmpBank; - TmpBank.copyInitParams( FltBank ); - CFilterSteps TmpSteps; - TmpVars.k = ky; - TmpVars.o = oy; - buildFilterSteps( TmpSteps, TmpVars, TmpBank, 1.0, m, true ); - updateFilterStepBuffers( TmpSteps, TmpVars, RPosBufArray, - SrcHeight, NewHeight ); - - fillUsedFracMap( TmpSteps[ TmpVars.ResizeStep ], - UsedFracMap ); - - const int c = calcComplexity( TmpSteps, TmpVars, UsedFracMap, - NewWidth ); - - if( c < BestScore ) - { - UseBuildMode = m; - BestScore = c; - } - } - } - - Vars.k = ky; - Vars.o = oy; - - if( UseBuildMode == PrevUseBuildMode && ky == kx ) - { - if( OutMul != 1.0 ) - { - modifyCorrFilterDCGain( FltSteps, 1.0 / OutMul ); - } - } - else - { - buildFilterSteps( FltSteps, Vars, FltBank, 1.0, UseBuildMode, - false ); - } - - updateFilterStepBuffers( FltSteps, Vars, RPosBufArray, SrcHeight, - NewHeight ); - - updateBufLenAndRPosPtrs( FltSteps, Vars, NewWidth ); - - if( IsOutFloat && sizeof( FltBuf[ 0 ]) == sizeof( Tout ) && - fpclass :: packmode == 0 ) - { - // In-place output. - - for( i = 0; i < ThreadCount; i++ ) - { - td[ i ].initScanlineQueue( td[ i ].sopResizeV, NewWidth, - SrcHeight, NewWidthE, NewWidthE ); - } - - for( i = 0; i < NewWidth; i++ ) - { - td[ i % ThreadCount ].addScanlineToQueue( - &FltBuf[ (size_t) i * ElCount ], - (fptype*) &NewBuf[ (size_t) i * ElCount ]); - } - - ThreadPool.startAllWorkloads(); - td[ 0 ].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - ThreadPool.removeAllWorkloads(); - - return; - } - - CBuffer< fptype, size_t > ResBuf( (size_t) NewWidthE * NewHeight, - fpclass :: fpalign ); - - for( i = 0; i < ThreadCount; i++ ) - { - td[ i ].initScanlineQueue( td[ i ].sopResizeV, NewWidth, - SrcHeight, NewWidthE, NewWidthE ); - } - - const int im = ( fpclass :: packmode == 0 ? ElCount : 1 ); - - for( i = 0; i < NewWidth; i++ ) - { - td[ i % ThreadCount ].addScanlineToQueue( - &FltBuf[ (size_t) i * im ], &ResBuf[ (size_t) i * im ]); - } - - ThreadPool.startAllWorkloads(); - td[ 0 ].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - - if( IsOutFloat ) - { - // Perform output, but skip dithering. - - for( i = 0; i < ThreadCount; i++ ) - { - td[ i ].initScanlineQueue( td[ i ].sopUnpackH, - NewHeight, NewWidth ); - } - - for( i = 0; i < NewHeight; i++ ) - { - td[ i % ThreadCount ].addScanlineToQueue( - &ResBuf[ (size_t) i * NewWidthE ], - &NewBuf[ (size_t) i * NewWidth * ElCountIO ]); - } - - ThreadPool.startAllWorkloads(); - td[ 0 ].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - ThreadPool.removeAllWorkloads(); - - return; - } - - // Perform output with dithering (for integer output only). - - int TruncBits; // The number of lower bits to truncate and dither. - int OutRange; // Output range. - - if( sizeof( Tout ) == 1 ) - { - TruncBits = 8 - ResBitDepth; - OutRange = 255; - } - else - { - TruncBits = 16 - ResBitDepth; - OutRange = 65535; - } - - const double PkOut = OutRange; - const double TrMul = ( TruncBits > 0 ? - PkOut / ( OutRange >> TruncBits ) : 1.0 ); - - if( CDitherer :: isRecursive() ) - { - td[ 0 ].getDitherer().init( NewWidth, Vars, TrMul, PkOut ); - - if( Vars.UseSRGBGamma ) - { - for( i = 0; i < NewHeight; i++ ) - { - fptype* const ResScanline = - &ResBuf[ (size_t) i * NewWidthE ]; - - CFilterStep :: applySRGBGamma( ResScanline, NewWidth, - Vars ); - - td[ 0 ].getDitherer().dither( ResScanline ); - - CFilterStep :: unpackScanline( ResScanline, - &NewBuf[ (size_t) i * NewWidth * ElCountIO ], - NewWidth, Vars ); - } - } - else - { - for( i = 0; i < NewHeight; i++ ) - { - fptype* const ResScanline = - &ResBuf[ (size_t) i * NewWidthE ]; - - td[ 0 ].getDitherer().dither( ResScanline ); - - CFilterStep :: unpackScanline( ResScanline, - &NewBuf[ (size_t) i * NewWidth * ElCountIO ], - NewWidth, Vars ); - } - } - } - else - { - for( i = 0; i < ThreadCount; i++ ) - { - td[ i ].initScanlineQueue( td[ i ].sopDitherAndUnpackH, - NewHeight, NewWidth ); - - td[ i ].getDitherer().init( NewWidth, Vars, TrMul, PkOut ); - } - - for( i = 0; i < NewHeight; i++ ) - { - td[ i % ThreadCount ].addScanlineToQueue( - &ResBuf[ (size_t) i * NewWidthE ], - &NewBuf[ (size_t) i * NewWidth * ElCountIO ]); - } - - ThreadPool.startAllWorkloads(); - td[ 0 ].processScanlineQueue(); - ThreadPool.waitAllWorkloadsToFinish(); - } - - ThreadPool.removeAllWorkloads(); - } - -private: - typedef typename fpclass :: fptype fptype; ///< Floating-point type to use - ///< during processing. - ///< - typedef typename fpclass :: CFilterStep CFilterStep; ///< Filtering step - ///< class to use during processing. - ///< - typedef typename fpclass :: CDitherer CDitherer; ///< Ditherer class to - ///< use during processing. - ///< - CImageResizerParams Params; ///< Algorithm's parameters currently in use. - ///< - int SrcBitDepth; ///< Bit resolution of the source image. - ///< - int ResBitDepth; ///< Bit resolution of the resulting image. - ///< - CDSPFracFilterBankLin< fptype > FixedFilterBank; ///< Fractional delay - ///< filter bank with fixed characteristics, mainly for upsizing - ///< cases. - ///< - - /** - * @brief Filtering steps array. - * - * The object of this class stores filtering steps together. - */ - - typedef CStructArray< CFilterStep > CFilterSteps; - - /** - * Function initializes the filter bank in the specified resizing step - * according to the source and resulting image bit depths. - * - * @param FltBank Filter bank to initialize. - * @param CutoffMult Cutoff multiplier, 0 to 1. 1 corresponds to 0.5pi - * cutoff point. - * @param ForceHiOrder "True" if a high-order interpolation should be - * forced which requires considerably less resources for initialization. - * @param ExtFilter External filter to apply to interpolation filter. - */ - - void initFilterBank( CDSPFracFilterBankLin< fptype >& FltBank, - const double CutoffMult, const bool ForceHiOrder, - const CFltBuffer& ExtFilter ) const - { - const int IntBitDepth = ( ResBitDepth > SrcBitDepth ? ResBitDepth : - SrcBitDepth ); - - const double SNR = -6.02 * ( IntBitDepth + 3 ); - int UseOrder; - int FracCount; // The number of fractional delay filters sampled by - // the filter bank. This variable affects the signal-to-noise - // ratio at interpolation stage. Theoretically, at UseOrder==1, - // 8-bit image resizing requires 66.2 dB SNR or 11. 16-bit - // resizing requires 114.4 dB SNR or 150. At UseOrder=0 the - // required number of filters is exponentially higher. - - if( ForceHiOrder || IntBitDepth > 8 ) - { - UseOrder = 1; // -146 dB max - FracCount = (int) ceil( 0.23134052 * exp( -0.058062929 * SNR )); - } - else - { - UseOrder = 0; // -72 dB max - FracCount = (int) ceil( 0.33287686 * exp( -0.11334583 * SNR )); - } - - if( FracCount < 2 ) - { - FracCount = 2; - } - - FltBank.init( FracCount, UseOrder, Params.IntFltLen / CutoffMult, - Params.IntFltCutoff * CutoffMult, Params.IntFltAlpha, ExtFilter, - fpclass :: fpalign, fpclass :: elalign ); - } - - /** - * Function allocates filter buffer taking "fpclass" alignments into - * account. The allocated buffer may be larger than the requested size: in - * this case the additional elements will be zeroed by this function. - * - * @param Flt Filter buffer. - * @param ReqCapacity The required filter buffer's capacity. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter allocation. - * @param FltExt If non-NULL this variable will receive the number of - * elements the filter was extended by. - */ - - static void allocFilter( CBuffer< fptype >& Flt, const int ReqCapacity, - const bool IsModel = false, int* const FltExt = NULL ) - { - int UseCapacity = ( ReqCapacity + fpclass :: elalign - 1 ) & - ~( fpclass :: elalign - 1 ); - - int Ext = UseCapacity - ReqCapacity; - - if( FltExt != NULL ) - { - *FltExt = Ext; - } - - if( IsModel ) - { - Flt.forceCapacity( UseCapacity ); - return; - } - - Flt.alloc( UseCapacity, fpclass :: fpalign ); - - while( Ext > 0 ) - { - Ext--; - Flt[ ReqCapacity + Ext ] = 0.0; - } - } - - /** - * Function assigns filter parameters to the specified filtering step - * object. - * - * @param fs Filtering step to assign parameter to. This step cannot be - * the last step if ResampleFactor greater than 1 was specified. - * @param IsUpsample "True" if upsampling step. Should be set to "false" - * if FltCutoff is negative. - * @param ResampleFactor Resampling factor of this filter (>=1). - * @param FltCutoff Filter cutoff point. This value will be divided by the - * ResampleFactor if IsUpsample equals "true". If zero value was - * specified, the "half-band" predefined filter will be created. In this - * case the ResampleFactor will modify the filter cutoff point. - * @param DCGain DC gain to apply to the filter. Assigned to filtering - * step's DCGain variable. - * @param UseFltOrig "True" if the originally-designed filter should be - * left in filtering step's FltOrig buffer. Otherwise it will be freed. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - void assignFilterParams( CFilterStep& fs, const bool IsUpsample, - const int ResampleFactor, const double FltCutoff, const double DCGain, - const bool UseFltOrig, const bool IsModel ) const - { - double FltAlpha; - double Len2; - double Freq; - - if( FltCutoff == 0.0 ) - { - const double m = 2.0 / ResampleFactor; - FltAlpha = Params.HBFltAlpha; - Len2 = 0.5 * Params.HBFltLen / m; - Freq = AVIR_PI * Params.HBFltCutoff * m; - } - else - { - FltAlpha = Params.LPFltAlpha; - Len2 = 0.25 * Params.LPFltBaseLen / FltCutoff; - Freq = AVIR_PI * Params.LPFltCutoffMult * FltCutoff; - } - - if( IsUpsample ) - { - Len2 *= ResampleFactor; - Freq /= ResampleFactor; - fs.DCGain = DCGain * ResampleFactor; - } - else - { - fs.DCGain = DCGain; - } - - fs.FltOrig.Len2 = Len2; - fs.FltOrig.Freq = Freq; - fs.FltOrig.Alpha = FltAlpha; - fs.FltOrig.DCGain = fs.DCGain; - - CDSPPeakedCosineLPF w( Len2, Freq, FltAlpha ); - - fs.IsUpsample = IsUpsample; - fs.ResampleFactor = ResampleFactor; - fs.FltLatency = w.fl2; - - int FltExt; // Filter's extension due to fpclass :: elalign. - - if( IsModel ) - { - allocFilter( fs.Flt, w.FilterLen, true, &FltExt ); - - if( UseFltOrig ) - { - // Allocate a real buffer even in modeling mode since this - // filter may be copied by the filter bank. - - fs.FltOrig.alloc( w.FilterLen ); - memset( &fs.FltOrig[ 0 ], 0, - w.FilterLen * sizeof( fs.FltOrig[ 0 ])); - } - } - else - { - fs.FltOrig.alloc( w.FilterLen ); - - w.generateLPF( &fs.FltOrig[ 0 ], 1.0 ); - optimizeFIRFilter( fs.FltOrig, fs.FltLatency ); - normalizeFIRFilter( &fs.FltOrig[ 0 ], fs.FltOrig.getCapacity(), - fs.DCGain ); - - allocFilter( fs.Flt, fs.FltOrig.getCapacity(), false, &FltExt ); - copyArray( &fs.FltOrig[ 0 ], &fs.Flt[ 0 ], - fs.FltOrig.getCapacity() ); - - if( !UseFltOrig ) - { - fs.FltOrig.free(); - } - } - - if( IsUpsample ) - { - int l = fs.Flt.getCapacity() - fs.FltLatency - ResampleFactor - - FltExt; - - allocFilter( fs.PrefixDC, l, IsModel ); - allocFilter( fs.SuffixDC, fs.FltLatency, IsModel ); - - if( IsModel ) - { - return; - } - - // Create prefix and suffix "tails" used during upsampling. - - const fptype* ip = &fs.Flt[ fs.FltLatency + ResampleFactor ]; - copyArray( ip, &fs.PrefixDC[ 0 ], l ); - - while( true ) - { - ip += ResampleFactor; - l -= ResampleFactor; - - if( l <= 0 ) - { - break; - } - - addArray( ip, &fs.PrefixDC[ 0 ], l ); - } - - l = fs.FltLatency; - fptype* op = &fs.SuffixDC[ 0 ]; - copyArray( &fs.Flt[ 0 ], op, l ); - - while( true ) - { - op += ResampleFactor; - l -= ResampleFactor; - - if( l <= 0 ) - { - break; - } - - addArray( &fs.Flt[ 0 ], op, l ); - } - } - else - if( !UseFltOrig ) - { - fs.EdgePixelCount = fs.EdgePixelCountDef; - } - } - - /** - * Function adds a correction filter that tries to achieve a linear - * frequency response at all frequencies. The actual resulting response - * may feature a slight damping of the highest frequencies since a - * suitably short correction filter cannot fix steep high-frequency - * damping. - * - * This function assumes that the resizing step is currently the last - * step, even if it was not inserted yet: this allows placement of the - * correction filter both before and after the resizing step. - * - * @param Steps Filtering steps. - * @param bw Resulting bandwidth relative to the original bandwidth (which - * is 1.0), usually 1/k. Should be <= 1.0. - * @param IsPreCorrection "True" if the filtering step was already created - * and it is first in the Steps array. "True" also adds edge pixels to - * reduce edge artifacts. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - void addCorrectionFilter( CFilterSteps& Steps, const double bw, - const bool IsPreCorrection, const bool IsModel ) const - { - CFilterStep& fs = ( IsPreCorrection ? Steps[ 0 ] : Steps.add() ); - fs.IsUpsample = false; - fs.ResampleFactor = 1; - fs.DCGain = 1.0; - fs.EdgePixelCount = ( IsPreCorrection ? fs.EdgePixelCountDef : 0 ); - - if( IsModel ) - { - allocFilter( fs.Flt, CDSPFIREQ :: calcFilterLength( - Params.CorrFltLen, fs.FltLatency ), true ); - - return; - } - - const int BinCount = 65; // Frequency response bins to control. - const int BinCount1 = BinCount - 1; - double curbw = 1.0; // Bandwidth of the filter at the current step. - int i; - int j; - double re; - double im; - - CBuffer< double > Bins( BinCount ); // Adjustment introduced by all - // steps at all frequencies of interest. - - for( j = 0; j < BinCount; j++ ) - { - Bins[ j ] = 1.0; - } - - const int si = ( IsPreCorrection ? 1 : 0 ); - - for( i = si; i < Steps.getItemCount() - ( si ^ 1 ); i++ ) - { - const CFilterStep& fs = Steps[ i ]; - - if( fs.IsUpsample ) - { - curbw *= fs.ResampleFactor; - - if( fs.FltOrig.getCapacity() > 0 ) - { - continue; - } - } - - const double dcg = 1.0 / fs.DCGain; // DC gain correction. - const fptype* Flt; - int FltLen; - - if( fs.ResampleFactor == 0 ) - { - Flt = fs.FltBank -> getFilter( 0 ); - FltLen = fs.FltBank -> getFilterLen(); - } - else - { - Flt = &fs.Flt[ 0 ]; - FltLen = fs.Flt.getCapacity(); - } - - // Calculate frequency response adjustment introduced by the - // filter at this step, within the bounds of bandwidth of - // interest. - - for( j = 0; j < BinCount; j++ ) - { - const double th = AVIR_PI * bw / curbw * j / BinCount1; - - calcFIRFilterResponse( Flt, FltLen, th, re, im ); - - Bins[ j ] /= sqrt( re * re + im * im ) * dcg; - } - - if( !fs.IsUpsample && fs.ResampleFactor > 1 ) - { - curbw /= fs.ResampleFactor; - } - } - - // Calculate filter. - - CDSPFIREQ EQ; - EQ.init( bw * 2.0, Params.CorrFltLen, BinCount, 0.0, bw, false, - Params.CorrFltAlpha ); - - fs.FltLatency = EQ.getFilterLatency(); - - CBuffer< double > Filter( EQ.getFilterLength() ); - EQ.buildFilter( Bins, &Filter[ 0 ]); - normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 ); - optimizeFIRFilter( Filter, fs.FltLatency ); - normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 ); - - allocFilter( fs.Flt, Filter.getCapacity() ); - copyArray( &Filter[ 0 ], &fs.Flt[ 0 ], Filter.getCapacity() ); - - // Print a theoretically achieved final frequency response at various - // feature sizes (from DC to 1 pixel). Values above 255 means features - // become brighter, values below 255 means features become dimmer. - -/* const double sbw = ( bw > 1.0 ? 1.0 / bw : 1.0 ); - - for( j = 0; j < BinCount; j++ ) - { - const double th = AVIR_PI * sbw * j / BinCount1; - - calcFIRFilterResponse( &fs.Flt[ 0 ], fs.Flt.getCapacity(), - th, re, im ); - - printf( "%f\n", sqrt( re * re + im * im ) / Bins[ j ] * 255 ); - } - - printf( "***\n" );*/ - } - - /** - * Function adds a sharpening filter if image is being upsized. Such - * sharpening allows to spot interpolation filter's stop-band attenuation: - * if attenuation is too weak, a "dark grid" and other artifacts may - * become visible. - * - * It is assumed that 40 decibel stop-band attenuation should be - * considered a required minimum: this allows application of (deliberately - * strong) 64X sharpening without spotting any artifacts. - * - * @param Steps Filtering steps. - * @param bw Resulting bandwidth relative to the original bandwidth (which - * is 1.0), usually 1/k. - * @param IsModel "True" if filtering steps modeling is performed without - * actual filter building. - */ - - static void addSharpenTest( CFilterSteps& Steps, const double bw, - const bool IsModel ) - { - if( bw <= 1.0 ) - { - return; - } - - const double FltLen = 10.0 * bw; - - CFilterStep& fs = Steps.add(); - fs.IsUpsample = false; - fs.ResampleFactor = 1; - fs.DCGain = 1.0; - fs.EdgePixelCount = 0; - - if( IsModel ) - { - allocFilter( fs.Flt, CDSPFIREQ :: calcFilterLength( FltLen, - fs.FltLatency ), true ); - - return; - } - - const int BinCount = 200; - CBuffer< double > Bins( BinCount ); - int Thresh = (int) round( BinCount / bw * 1.75 ); - - if( Thresh > BinCount ) - { - Thresh = BinCount; - } - - int j; - - for( j = 0; j < Thresh; j++ ) - { - Bins[ j ] = 1.0; - } - - for( j = Thresh; j < BinCount; j++ ) - { - Bins[ j ] = 256.0; - } - - CDSPFIREQ EQ; - EQ.init( bw * 2.0, FltLen, BinCount, 0.0, bw, false, 1.7 ); - - fs.FltLatency = EQ.getFilterLatency(); - - CBuffer< double > Filter( EQ.getFilterLength() ); - EQ.buildFilter( Bins, &Filter[ 0 ]); - normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 ); - optimizeFIRFilter( Filter, fs.FltLatency ); - normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 ); - - allocFilter( fs.Flt, Filter.getCapacity() ); - copyArray( &Filter[ 0 ], &fs.Flt[ 0 ], Filter.getCapacity() ); - -/* for( j = 0; j < BinCount; j++ ) - { - const double th = AVIR_PI * j / ( BinCount - 1 ); - double re; - double im; - - calcFIRFilterResponse( &fs.Flt[ 0 ], fs.Flt.getCapacity(), - th, re, im ); - - printf( "%f\n", sqrt( re * re + im * im )); - } - - printf( "***\n" );*/ - } - - /** - * Function builds sequence of filtering steps depending on the specified - * resizing coefficient. The last steps included are always the resizing - * step then (possibly) the correction step. - * - * @param Steps Array that receives filtering steps. - * @param[out] Vars Variables object. - * @param FltBank Filter bank to initialize and use. - * @param DCGain The overall DC gain to apply. This DC gain is applied to - * the first filtering step only (upsampling or filtering step). - * @param ModeFlags Build mode flags to use. This is a bitmap of switches - * that enable or disable certain algorithm features. - * @param IsModel "True" if filtering steps modeling is performed without - * the actual filter allocation and building. - */ - - void buildFilterSteps( CFilterSteps& Steps, CImageResizerVars& Vars, - CDSPFracFilterBankLin< fptype >& FltBank, const double DCGain, - const int ModeFlags, const bool IsModel ) const - { - Steps.clear(); - - const bool DoFltAndIntCombo = (( ModeFlags & 1 ) != 0 ); // Do filter - // and interpolator combining. - const bool ForceHiOrderInt = (( ModeFlags & 2 ) != 0 ); // Force use - // of a higher-order interpolation. - const bool UseHalfband = (( ModeFlags & 4 ) != 0 ); // Use half-band - // filter. - - const double bw = 1.0 / Vars.k; // Resulting bandwidth. - const int UpsampleFactor = ( (int) floor( Vars.k ) < 2 ? 2 : 1 ); - double IntCutoffMult; // Interpolation filter cutoff multiplier. - CFilterStep* ReuseStep; // If not NULL, resizing step should use - // this step object instead of creating a new one. - CFilterStep* ExtFltStep; // Use FltOrig of this step as the external - // filter to applied to the interpolator. - bool IsPreCorrection; // "True" if the correction filter is applied - // first. - double FltCutoff; // Cutoff frequency of the first filtering step. - double corrbw; ///< Bandwidth at the correction step. - - if( Vars.k <= 1.0 ) - { - IsPreCorrection = true; - FltCutoff = 1.0; - corrbw = 1.0; - Steps.add(); - } - else - { - IsPreCorrection = false; - FltCutoff = bw; - corrbw = bw; - } - - // Add 1 upsampling or several downsampling filters. - - if( UpsampleFactor > 1 ) - { - CFilterStep& fs = Steps.add(); - assignFilterParams( fs, true, UpsampleFactor, FltCutoff, DCGain, - DoFltAndIntCombo, IsModel ); - - IntCutoffMult = FltCutoff * 2.0 / UpsampleFactor; - ReuseStep = NULL; - ExtFltStep = ( DoFltAndIntCombo ? &fs : NULL ); - } - else - { - int DownsampleFactor; - - while( true ) - { - DownsampleFactor = (int) floor( 0.5 / FltCutoff ); - bool DoHBFltAdd; - - if( DownsampleFactor > 16 ) - { - // Add half-band filter unconditionally in order to keep - // filter lengths lower for more precise frequency - // response and less edge artifacts. - - DoHBFltAdd = true; - DownsampleFactor = 16; - } - else - { - DoHBFltAdd = ( UseHalfband && DownsampleFactor > 1 ); - } - - if( DoHBFltAdd ) - { - assignFilterParams( Steps.add(), false, DownsampleFactor, - 0.0, 1.0, false, IsModel ); - - FltCutoff *= DownsampleFactor; - } - else - { - if( DownsampleFactor < 1 ) - { - DownsampleFactor = 1; - } - - break; - } - } - - CFilterStep& fs = Steps.add(); - assignFilterParams( fs, false, DownsampleFactor, FltCutoff, - DCGain, DoFltAndIntCombo, IsModel ); - - IntCutoffMult = FltCutoff / 0.5; - - if( DoFltAndIntCombo ) - { - ReuseStep = &fs; - ExtFltStep = &fs; - } - else - { - IntCutoffMult *= DownsampleFactor; - ReuseStep = NULL; - ExtFltStep = NULL; - } - } - - // Insert resizing and correction steps. - - CFilterStep& fs = ( ReuseStep == NULL ? Steps.add() : *ReuseStep ); - - Vars.ResizeStep = Steps.getItemCount() - 1; - fs.IsUpsample = false; - fs.ResampleFactor = 0; - fs.DCGain = ( ExtFltStep == NULL ? 1.0 : ExtFltStep -> DCGain ); - - initFilterBank( FltBank, IntCutoffMult, ForceHiOrderInt, - ( ExtFltStep == NULL ? fs.FltOrig : ExtFltStep -> FltOrig )); - - if( FltBank == FixedFilterBank ) - { - fs.FltBank = (CDSPFracFilterBankLin< fptype >*) &FixedFilterBank; - } - else - { - fs.FltBank = &FltBank; - } - - addCorrectionFilter( Steps, corrbw, IsPreCorrection, IsModel ); - - //addSharpenTest( Steps, bw, IsModel ); - } - - /** - * Function extends *this upsampling step so that it produces more - * upsampled pixels that cover the prefix and suffix needs of the next - * step. After the call to this function the InPrefix and InSuffix - * variables of the next step will be set to zero. - * - * @param fs Upsampling filtering step. - * @param NextStep The next step structure. - */ - - static void extendUpsample( CFilterStep& fs, CFilterStep& NextStep ) - { - fs.InPrefix = ( NextStep.InPrefix + fs.ResampleFactor - 1 ) / - fs.ResampleFactor; - - fs.OutPrefix += fs.InPrefix * fs.ResampleFactor; - NextStep.InPrefix = 0; - - fs.InSuffix = ( NextStep.InSuffix + fs.ResampleFactor - 1 ) / - fs.ResampleFactor; - - fs.OutSuffix += fs.InSuffix * fs.ResampleFactor; - NextStep.InSuffix = 0; - } - - /** - * Function fills resizing step's RPosBuf array, excluding the actual - * "ftp" pointers and "SrcOffs" offsets. - * - * This array should be cleared if the resizing step or offset were - * changed. Otherwise this function only fills the elements required to - * cover resizing step's OutLen. - * - * This function is called by the updateFilterStepBuffers() function. - * - * @param fs Resizing step. - * @param Vars Variables object. - */ - - static void fillRPosBuf( CFilterStep& fs, const CImageResizerVars& Vars ) - { - const int PrevLen = fs.RPosBuf -> getCapacity(); - - if( fs.OutLen > PrevLen ) - { - fs.RPosBuf -> increaseCapacity( fs.OutLen ); - } - - typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ PrevLen ]; - const int FracCount = fs.FltBank -> getFracCount(); - const double o = Vars.o; - const double k = Vars.k; - int i; - - for( i = PrevLen; i < fs.OutLen; i++ ) - { - const double SrcPos = o + k * i; - const int SrcPosInt = (int) floor( SrcPos ); - const double x = ( SrcPos - SrcPosInt ) * FracCount; - const int fti = (int) x; - rpos -> x = (typename fpclass :: fptypeatom) ( x - fti ); - rpos -> fti = fti; - rpos -> SrcPosInt = SrcPosInt; - rpos++; - } - } - - /** - * Function updates filtering step buffer lengths depending on the - * specified source and new scanline lengths. This function should be - * called after the buildFilterSteps() function. - * - * @param Steps Array that receives filtering steps. - * @param[out] Vars Variables object, will receive buffer size and length. - * This function expects "k" and "o" variable values that will be - * adjusted by this function. - * @param RPosBufArray Resizing position buffers array, used to obtain - * buffer to initialize and use (will be reused if it is already fully or - * partially filled). - * @param SrcLen Source scanline's length in pixels. - * @param NewLen New scanline's length in pixels. - */ - - static void updateFilterStepBuffers( CFilterSteps& Steps, - CImageResizerVars& Vars, - typename CFilterStep :: CRPosBufArray& RPosBufArray, int SrcLen, - const int NewLen ) - { - int upstep = -1; - int InBuf = 0; - int i; - - for( i = 0; i < Steps.getItemCount(); i++ ) - { - CFilterStep& fs = Steps[ i ]; - - fs.Vars = &Vars; - fs.InLen = SrcLen; - fs.InBuf = InBuf; - fs.OutBuf = ( InBuf + 1 ) & 1; - - if( fs.IsUpsample ) - { - upstep = i; - Vars.k *= fs.ResampleFactor; - Vars.o *= fs.ResampleFactor; - fs.InPrefix = 0; - fs.InSuffix = 0; - fs.OutLen = fs.InLen * fs.ResampleFactor; - fs.OutPrefix = fs.FltLatency; - fs.OutSuffix = fs.Flt.getCapacity() - fs.FltLatency - - fs.ResampleFactor; - - int l0 = fs.OutPrefix + fs.OutLen + fs.OutSuffix; - int l = fs.InLen * fs.ResampleFactor + - fs.SuffixDC.getCapacity(); - - if( l > l0 ) - { - fs.OutSuffix += l - l0; - } - - l0 = fs.OutLen + fs.OutSuffix; - - if( fs.PrefixDC.getCapacity() > l0 ) - { - fs.OutSuffix += fs.PrefixDC.getCapacity() - l0; - } - } - else - if( fs.ResampleFactor == 0 ) - { - const int FilterLenD2 = fs.FltBank -> getFilterLen() / 2; - const int FilterLenD21 = FilterLenD2 - 1; - - const int ResizeLPix = (int) floor( Vars.o ) - FilterLenD21; - fs.InPrefix = ( ResizeLPix < 0 ? -ResizeLPix : 0 ); - const int ResizeRPix = (int) floor( Vars.o + - ( NewLen - 1 ) * Vars.k ) + FilterLenD2 + 1; - - fs.InSuffix = ( ResizeRPix > fs.InLen ? - ResizeRPix - fs.InLen : 0 ); - - fs.OutLen = NewLen; - fs.RPosBuf = &RPosBufArray.getRPosBuf( Vars.k, Vars.o, - fs.FltBank -> getFracCount() ); - - fillRPosBuf( fs, Vars ); - } - else - { - Vars.k /= fs.ResampleFactor; - Vars.o /= fs.ResampleFactor; - Vars.o += fs.EdgePixelCount; - - fs.InPrefix = fs.FltLatency; - fs.InSuffix = fs.Flt.getCapacity() - fs.FltLatency - 1; - - // Additionally extend OutLen to produce more precise edge - // pixels. - - fs.OutLen = ( fs.InLen + fs.ResampleFactor - 1 ) / - fs.ResampleFactor + fs.EdgePixelCount; - - fs.InSuffix += ( fs.OutLen - 1 ) * fs.ResampleFactor + 1 - - fs.InLen; - - fs.InPrefix += fs.EdgePixelCount * fs.ResampleFactor; - fs.OutLen += fs.EdgePixelCount; - } - - InBuf = fs.OutBuf; - SrcLen = fs.OutLen; - } - - Steps[ Steps.getItemCount() - 1 ].OutBuf = 2; - - if( upstep != -1 ) - { - extendUpsample( Steps[ upstep ], Steps[ upstep + 1 ]); - } - } - - /** - * Function calculates an optimal intermediate buffer length that will - * cover all needs of the specified filtering steps. This function should - * be called after the updateFilterStepBuffers() function. - * - * Function also updates resizing step's RPosBuf pointers to the filter - * bank and SrcOffs values. - * - * @param Steps Filtering steps. - * @param[out] Vars Variables object, will receive buffer size and length. - * @param ResElIncr Resulting (final) element increment, used to produce - * de-interleaved result. For horizontal processing this value is equal - * to last step's OutLen, for vertical processing this value is equal to - * resulting image's width. - */ - - static void updateBufLenAndRPosPtrs( CFilterSteps& Steps, - CImageResizerVars& Vars, const int ResElIncr ) - { - int MaxPrefix[ 2 ] = { 0, 0 }; - int MaxLen[ 2 ] = { 0, 0 }; - int i; - - for( i = 0; i < Steps.getItemCount(); i++ ) - { - CFilterStep& fs = Steps[ i ]; - const int ib = fs.InBuf; - - if( fs.InPrefix > MaxPrefix[ ib ]) - { - MaxPrefix[ ib ] = fs.InPrefix; - } - - int l = fs.InLen + fs.InSuffix; - - if( l > MaxLen[ ib ]) - { - MaxLen[ ib ] = l; - } - - fs.InElIncr = fs.InPrefix + l; - - if( fs.OutBuf == 2 ) - { - break; - } - - const int ob = fs.OutBuf; - - if( fs.IsUpsample ) - { - if( fs.OutPrefix > MaxPrefix[ ob ]) - { - MaxPrefix[ ob ] = fs.OutPrefix; - } - - l = fs.OutLen + fs.OutSuffix; - - if( l > MaxLen[ ob ]) - { - MaxLen[ ob ] = l; - } - } - else - { - if( fs.OutLen > MaxLen[ ob ]) - { - MaxLen[ ob ] = fs.OutLen; - } - } - } - - // Update OutElIncr values of all steps. - - for( i = 0; i < Steps.getItemCount(); i++ ) - { - CFilterStep& fs = Steps[ i ]; - - if( fs.OutBuf == 2 ) - { - fs.OutElIncr = ResElIncr; - break; - } - - CFilterStep& fs2 = Steps[ i + 1 ]; - - if( fs.IsUpsample ) - { - fs.OutElIncr = fs.OutPrefix + fs.OutLen + fs.OutSuffix; - - if( fs.OutElIncr > fs2.InElIncr ) - { - fs2.InElIncr = fs.OutElIncr; - } - else - { - fs.OutElIncr = fs2.InElIncr; - } - } - else - { - fs.OutElIncr = fs2.InElIncr; - } - } - - // Update temporary buffer's length. - - for( i = 0; i < 2; i++ ) - { - Vars.BufLen[ i ] = MaxPrefix[ i ] + MaxLen[ i ]; - Vars.BufOffs[ i ] = MaxPrefix[ i ]; - - if( Vars.packmode == 0 ) - { - Vars.BufOffs[ i ] *= Vars.ElCount; - } - - Vars.BufLen[ i ] *= Vars.ElCount; - } - - // Update RPosBuf pointers and SrcOffs. - - CFilterStep& fs = Steps[ Vars.ResizeStep ]; - typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ 0 ]; - const int em = ( fpclass :: packmode == 0 ? Vars.ElCount : 1 ); - const int FilterLenD21 = fs.FltBank -> getFilterLen() / 2 - 1; - - for( i = 0; i < fs.OutLen; i++ ) - { - rpos -> ftp = fs.FltBank -> getFilter( rpos -> fti ); - rpos -> SrcOffs = ( rpos -> SrcPosInt - FilterLenD21 ) * em; - rpos++; - } - } - - /** - * Function modifies the overall (DC) gain of the correction filter in the - * pre-built filtering steps array. - * - * @param Steps Filtering steps. - * @param m Multiplier to apply to the correction filter. - */ - - void modifyCorrFilterDCGain( CFilterSteps& Steps, const double m ) const - { - CBuffer< fptype >* Flt; - const int z = Steps.getItemCount() - 1; - - if( !Steps[ z ].IsUpsample && Steps[ z ].ResampleFactor == 1 ) - { - Flt = &Steps[ z ].Flt; - } - else - { - Flt = &Steps[ 0 ].Flt; - } - - int i; - - for( i = 0; i < Flt -> getCapacity(); i++ ) - { - (*Flt)[ i ] = (fptype) ( (double) (*Flt)[ i ] * m ); - } - } - - /** - * Function builds a map of used fractional delay filters based on the - * resizing positions buffer. - * - * @param fs Resizing step. - * @param[out] UsedFracMap Map of used fractional delay filters. - */ - - static void fillUsedFracMap( const CFilterStep& fs, - CBuffer< uint8_t >& UsedFracMap ) - { - const int FracCount = fs.FltBank -> getFracCount(); - UsedFracMap.increaseCapacity( FracCount, false ); - memset( &UsedFracMap[ 0 ], 0, FracCount * sizeof( UsedFracMap[ 0 ])); - - typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ 0 ]; - int i; - - for( i = 0; i < fs.OutLen; i++ ) - { - UsedFracMap[ rpos -> fti ] |= 1; - rpos++; - } - } - - /** - * Function calculates the overall filtering steps complexity per - * scanline. Each complexity unit corresponds to a single multiply-add - * operation. Data copy and pointer math operations are not included in - * this calculation, it is assumed that they correlate to the multiply-add - * operations. Calculation also does not include final rounding, dithering - * and clamping operations since they cannot be optimized out anyway. - * - * Calculation of the CRPosBuf buffer is not included since it cannot be - * avoided. - * - * This function should be called after the updateFilterStepBuffers() - * function. - * - * @param Steps Filtering steps array. - * @param Vars Variables object. - * @param UsedFracMap The map of used fractional delay filters. - * @param ScanlineCount Scanline count. - */ - - static int calcComplexity( const CFilterSteps& Steps, - const CImageResizerVars& Vars, const CBuffer< uint8_t >& UsedFracMap, - const int ScanlineCount ) - { - int fcnum; // Filter complexity multiplier numerator. - int fcdenom; // Filter complexity multiplier denominator. - - if( Vars.packmode != 0 ) - { - fcnum = 1; - fcdenom = 1; - } - else - { - // In interleaved processing mode, filters require 1 less - // multiplication per 2 multiply-add instructions. - - fcnum = 3; - fcdenom = 4; - } - - int s = 0; // Complexity per one scanline. - int s2 = 0; // Complexity per all scanlines. - int i; - - for( i = 0; i < Steps.getItemCount(); i++ ) - { - const CFilterStep& fs = Steps[ i ]; - - s2 += 65 * fs.Flt.getCapacity(); // Filter creation complexity. - - if( fs.IsUpsample ) - { - if( fs.FltOrig.getCapacity() > 0 ) - { - continue; - } - - s += ( fs.Flt.getCapacity() * - ( fs.InPrefix + fs.InLen + fs.InSuffix ) + - fs.SuffixDC.getCapacity() + fs.PrefixDC.getCapacity() ) * - Vars.ElCount; - } - else - if( fs.ResampleFactor == 0 ) - { - s += fs.FltBank -> getFilterLen() * - ( fs.FltBank -> getOrder() + Vars.ElCount ) * fs.OutLen; - - s2 += fs.FltBank -> calcInitComplexity( UsedFracMap ); - } - else - { - s += fs.Flt.getCapacity() * Vars.ElCount * fs.OutLen * - fcnum / fcdenom; - } - } - - return( s + s2 / ScanlineCount ); - } - - /** - * @brief Thread-isolated data used for scanline processing. - * - * This structure holds data necessary for image's horizontal or vertical - * scanline processing, including scanline processing queue. - * - * @tparam Tin Source element data type. Intermediate buffers store data - * in floating point format. - * @tparam Tout Destination element data type. Intermediate buffers store - * data in floating point format. - */ - - template< class Tin, class Tout > - class CThreadData : public CImageResizerThreadPool :: CWorkload - { - public: - virtual void process() - { - processScanlineQueue(); - } - - /** - * This enumeration lists possible scanline operations. - */ - - enum EScanlineOperation - { - sopResizeH, ///< Resize horizontal scanline. - ///< - sopResizeV, ///< Resize vertical scanline. - ///< - sopDitherAndUnpackH, ///< Dither and unpack horizontal scanline. - ///< - sopUnpackH ///< Unpack horizontal scanline. - ///< - }; - - /** - * Function initializes *this thread data object and assigns certain - * variables provided by the higher level code. - * - * @param aThreadIndex Index of this thread data (0-based). - * @param aThreadCount Total number of threads used during processing. - * @param aSteps Filtering steps. - * @param aVars Image resizer variables. - */ - - void init( const int aThreadIndex, const int aThreadCount, - const CFilterSteps& aSteps, const CImageResizerVars& aVars ) - { - ThreadIndex = aThreadIndex; - ThreadCount = aThreadCount; - Steps = &aSteps; - Vars = &aVars; - } - - /** - * Function initializes scanline processing queue, and updates - * capacities of intermediate buffers. - * - * @param aOp Operation to perform over scanline. - * @param TotalLines The total number of scanlines that will be - * processed by all threads. - * @param aSrcLen Source scanline length in pixels. - * @param aSrcIncr Source scanline buffer increment. Ignored in - * horizontal scanline processing. - * @param aResIncr Resulting scanline buffer increment. Ignored in - * horizontal scanline processing. - */ - - void initScanlineQueue( const EScanlineOperation aOp, - const int TotalLines, const int aSrcLen, const int aSrcIncr = 0, - const int aResIncr = 0 ) - { - const int l = Vars -> BufLen[ 0 ] + Vars -> BufLen[ 1 ]; - - if( Bufs.getCapacity() < l ) - { - Bufs.alloc( l, fpclass :: fpalign ); - } - - BufPtrs[ 0 ] = Bufs + Vars -> BufOffs[ 0 ]; - BufPtrs[ 1 ] = Bufs + Vars -> BufLen[ 0 ] + Vars -> BufOffs[ 1 ]; - - int j; - int ml = 0; - - for( j = 0; j < Steps -> getItemCount(); j++ ) - { - const CFilterStep& fs = (*Steps)[ j ]; - - if( fs.ResampleFactor == 0 && - ml < fs.FltBank -> getFilterLen() ) - { - ml = fs.FltBank -> getFilterLen(); - } - } - - TmpFltBuf.alloc( ml, fpclass :: fpalign ); - ScanlineOp = aOp; - SrcLen = aSrcLen; - SrcIncr = aSrcIncr; - ResIncr = aResIncr; - QueueLen = 0; - Queue.increaseCapacity(( TotalLines + ThreadCount - 1 ) / - ThreadCount, false ); - } - - /** - * Function adds a scanline to the queue buffer. The - * initScanlineQueue() function should be called before calling this - * function. The number of calls to this add function should not - * exceed the TotalLines spread over all threads. - * - * @param SrcBuf Source scanline buffer. - * @param ResBuf Resulting scanline buffer. - */ - - void addScanlineToQueue( void* const SrcBuf, void* const ResBuf ) - { - Queue[ QueueLen ].SrcBuf = SrcBuf; - Queue[ QueueLen ].ResBuf = ResBuf; - QueueLen++; - } - - /** - * Function processes all queued scanlines. - */ - - void processScanlineQueue() - { - int i; - - switch( ScanlineOp ) - { - case sopResizeH: - { - for( i = 0; i < QueueLen; i++ ) - { - resizeScanlineH( (Tin*) Queue[ i ].SrcBuf, - (fptype*) Queue[ i ].ResBuf ); - } - - break; - } - - case sopResizeV: - { - for( i = 0; i < QueueLen; i++ ) - { - resizeScanlineV( (fptype*) Queue[ i ].SrcBuf, - (fptype*) Queue[ i ].ResBuf ); - } - - break; - } - - case sopDitherAndUnpackH: - { - if( Vars -> UseSRGBGamma ) - { - for( i = 0; i < QueueLen; i++ ) - { - CFilterStep :: applySRGBGamma( - (fptype*) Queue[ i ].SrcBuf, SrcLen, *Vars ); - - Ditherer.dither( (fptype*) Queue[ i ].SrcBuf ); - - CFilterStep :: unpackScanline( - (fptype*) Queue[ i ].SrcBuf, - (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars ); - } - } - else - { - for( i = 0; i < QueueLen; i++ ) - { - Ditherer.dither( (fptype*) Queue[ i ].SrcBuf ); - - CFilterStep :: unpackScanline( - (fptype*) Queue[ i ].SrcBuf, - (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars ); - } - } - - break; - } - - case sopUnpackH: - { - if( Vars -> UseSRGBGamma ) - { - for( i = 0; i < QueueLen; i++ ) - { - CFilterStep :: applySRGBGamma( - (fptype*) Queue[ i ].SrcBuf, SrcLen, *Vars ); - - CFilterStep :: unpackScanline( - (fptype*) Queue[ i ].SrcBuf, - (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars ); - } - } - else - { - for( i = 0; i < QueueLen; i++ ) - { - CFilterStep :: unpackScanline( - (fptype*) Queue[ i ].SrcBuf, - (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars ); - } - } - - break; - } - } - } - - /** - * Function returns ditherer object associated with *this thread data - * object. - */ - - CDitherer& getDitherer() - { - return( Ditherer ); - } - - private: - int ThreadIndex; ///< Thread index. - ///< - int ThreadCount; ///< Thread count. - ///< - const CFilterSteps* Steps; ///< Filtering steps. - ///< - const CImageResizerVars* Vars; ///< Image resizer variables. - ///< - CBuffer< fptype > Bufs; ///< Flip-flop intermediate buffers. - ///< - fptype* BufPtrs[ 3 ]; ///< Flip-flop buffer pointers (referenced by - ///< filtering step's InBuf and OutBuf indices). - ///< - CBuffer< fptype > TmpFltBuf; ///< Temporary buffer used in the - ///< doResize() function, aligned by fpclass :: fpalign. - ///< - EScanlineOperation ScanlineOp; ///< Operation to perform over - ///< scanline. - ///< - int SrcLen; ///< Source scanline length in the last queue. - ///< - int SrcIncr; ///< Source scanline buffer increment in the last queue. - ///< - int ResIncr; ///< Resulting scanline buffer increment in the last - ///< queue. - ///< - CDitherer Ditherer; ///< Ditherer object to use. - ///< - - /** - * @brief Scanline processing queue item. - * - * Scanline processing queue item. - */ - - struct CQueueItem - { - void* SrcBuf; ///< Source scanline buffer, will by typecasted to - ///< Tin or fptype*. - ///< - void* ResBuf; ///< Resulting scanline buffer, will by typecasted - ///< to Tout or fptype*. - ///< - }; - - CBuffer< CQueueItem > Queue; ///< Scanline processing queue. - ///< - int QueueLen; ///< Queue length. - ///< - - /** - * Function resizes a single horizontal scanline. - * - * @param SrcBuf Source scanline buffer. Can be either horizontal or - * vertical. - * @param ResBuf Resulting scanline buffer. - */ - - void resizeScanlineH( const Tin* const SrcBuf, fptype* const ResBuf ) - { - (*Steps)[ 0 ].packScanline( SrcBuf, BufPtrs[ 0 ], SrcLen ); - BufPtrs[ 2 ] = ResBuf; - int j; - - for( j = 0; j < Steps -> getItemCount(); j++ ) - { - const CFilterStep& fs = (*Steps)[ j ]; - fs.prepareInBuf( BufPtrs[ fs.InBuf ]); - const int DstIncr = - ( Vars -> packmode == 0 ? Vars -> ElCount : 1 ); - - if( fs.ResampleFactor != 0 ) - { - if( fs.IsUpsample ) - { - fs.doUpsample( BufPtrs[ fs.InBuf ], - BufPtrs[ fs.OutBuf ]); - } - else - { - fs.doFilter( BufPtrs[ fs.InBuf ], - BufPtrs[ fs.OutBuf ], DstIncr ); - } - } - else - { - fs.doResize( BufPtrs[ fs.InBuf ], BufPtrs[ fs.OutBuf ], - DstIncr, TmpFltBuf ); - } - } - } - - /** - * Function resizes a single vertical scanline. - * - * @param SrcBuf Source scanline buffer. Can be either horizontal or - * vertical. - * @param ResBuf Resulting scanline buffer. - */ - - void resizeScanlineV( const fptype* const SrcBuf, - fptype* const ResBuf ) - { - (*Steps)[ 0 ].convertVtoH( SrcBuf, BufPtrs[ 0 ], SrcLen, - SrcIncr ); - - BufPtrs[ 2 ] = ResBuf; - int j; - - for( j = 0; j < Steps -> getItemCount(); j++ ) - { - const CFilterStep& fs = (*Steps)[ j ]; - fs.prepareInBuf( BufPtrs[ fs.InBuf ]); - const int DstIncr = ( fs.OutBuf == 2 ? ResIncr : - ( Vars -> packmode == 0 ? Vars -> ElCount : 1 )); - - if( fs.ResampleFactor != 0 ) - { - if( fs.IsUpsample ) - { - fs.doUpsample( BufPtrs[ fs.InBuf ], - BufPtrs[ fs.OutBuf ]); - } - else - { - fs.doFilter( BufPtrs[ fs.InBuf ], - BufPtrs[ fs.OutBuf ], DstIncr ); - } - } - else - { - fs.doResize( BufPtrs[ fs.InBuf ], BufPtrs[ fs.OutBuf ], - DstIncr, TmpFltBuf ); - } - } - } - }; -}; - -#undef AVIR_PI -#undef AVIR_PId2 - -} // namespace avir - -#endif // AVIR_CIMAGERESIZER_INCLUDED diff --git a/third_party/avir/avir.mk b/third_party/avir/avir.mk deleted file mode 100644 index dff90ea8..00000000 --- a/third_party/avir/avir.mk +++ /dev/null @@ -1,71 +0,0 @@ -#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ -#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘ - -PKGS += THIRD_PARTY_AVIR - -THIRD_PARTY_AVIR_ARTIFACTS += THIRD_PARTY_AVIR_A -THIRD_PARTY_AVIR = $(THIRD_PARTY_AVIR_A_DEPS) $(THIRD_PARTY_AVIR_A) -THIRD_PARTY_AVIR_A = o/$(MODE)/third_party/avir/avir.a -THIRD_PARTY_AVIR_A_CHECKS = $(THIRD_PARTY_AVIR_A).pkg -THIRD_PARTY_AVIR_A_FILES := $(wildcard third_party/avir/*) -THIRD_PARTY_AVIR_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_AVIR_A_FILES)) -THIRD_PARTY_AVIR_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_AVIR_A_FILES)) -THIRD_PARTY_AVIR_A_SRCS_X = $(filter %.cc,$(THIRD_PARTY_AVIR_A_FILES)) - -THIRD_PARTY_AVIR_A_HDRS = \ - $(filter %.h,$(THIRD_PARTY_AVIR_A_FILES)) \ - $(filter %.hpp,$(THIRD_PARTY_AVIR_A_FILES)) - -THIRD_PARTY_AVIR_A_SRCS = \ - $(THIRD_PARTY_AVIR_A_SRCS_S) \ - $(THIRD_PARTY_AVIR_A_SRCS_C) \ - $(THIRD_PARTY_AVIR_A_SRCS_X) - -THIRD_PARTY_AVIR_A_OBJS = \ - $(THIRD_PARTY_AVIR_A_SRCS:%=o/$(MODE)/%.zip.o) \ - $(THIRD_PARTY_AVIR_A_SRCS_S:%.S=o/$(MODE)/%.o) \ - $(THIRD_PARTY_AVIR_A_SRCS_C:%.c=o/$(MODE)/%.o) \ - $(THIRD_PARTY_AVIR_A_SRCS_X:%.cc=o/$(MODE)/%.o) - -THIRD_PARTY_AVIR_A_DIRECTDEPS = \ - DSP_CORE \ - LIBC_NEXGEN32E \ - LIBC_BITS \ - LIBC_MEM \ - LIBC_CALLS \ - LIBC_STUBS \ - LIBC_SYSV \ - LIBC_FMT \ - LIBC_UNICODE \ - LIBC_LOG \ - LIBC_TINYMATH - -$(THIRD_PARTY_AVIR_A).pkg: \ - $(THIRD_PARTY_AVIR_A_OBJS) \ - $(foreach x,$(THIRD_PARTY_AVIR_A_DIRECTDEPS),$($(x)_A).pkg) - -$(THIRD_PARTY_AVIR_A): \ - third_party/avir/ \ - $(THIRD_PARTY_AVIR_A).pkg \ - $(THIRD_PARTY_AVIR_A_OBJS) - -#o/$(MODE)/third_party/avir/lanczos1b.o: \ - CXX = clang++-10 - -o/$(MODE)/third_party/avir/lanczos1b.o \ -o/$(MODE)/third_party/avir/lanczos.o: \ - OVERRIDE_CXXFLAGS += \ - $(MATHEMATICAL) - -THIRD_PARTY_AVIR_A_DEPS := \ - $(call uniq,$(foreach x,$(THIRD_PARTY_AVIR_A_DIRECTDEPS),$($(x)))) - -THIRD_PARTY_AVIR_LIBS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x))) -THIRD_PARTY_AVIR_SRCS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_SRCS)) -THIRD_PARTY_AVIR_HDRS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_HDRS)) -THIRD_PARTY_AVIR_CHECKS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_CHECKS)) -THIRD_PARTY_AVIR_OBJS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_OBJS)) -THIRD_PARTY_AVIR_TESTS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_TESTS)) - -.PHONY: o/$(MODE)/third_party/avir -o/$(MODE)/third_party/avir: $(THIRD_PARTY_AVIR_A_CHECKS) diff --git a/third_party/avir/avir1.h b/third_party/avir/avir1.h deleted file mode 100644 index f803de03..00000000 --- a/third_party/avir/avir1.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_AVIR1_H_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_AVIR1_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -struct avir1 { - void *p; -}; - -void avir1init(struct avir1 *); -void avir1free(struct avir1 *); -void avir1(struct avir1 *, size_t, size_t, void *, size_t, size_t, size_t, - size_t, const void *, size_t); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_AVIR1_H_ */ diff --git a/third_party/avir/avir_dil.h b/third_party/avir/avir_dil.h deleted file mode 100644 index e7047312..00000000 --- a/third_party/avir/avir_dil.h +++ /dev/null @@ -1,1013 +0,0 @@ -/* clang-format off */ -//$ nobt -//$ nocpp - -/** - * @file avir_dil.h - * - * @brief Inclusion file for de-interleaved image resizing functions. - * - * This file includes the "CImageResizerFilterStepDIL" class which implements - * image resizing functions in de-interleaved mode. - * - * AVIR Copyright (c) 2015-2019 Aleksey Vaneev - */ - -namespace avir { - -/** - * @brief De-interleaved filtering steps implementation class. - * - * This class implements scanline filtering functions in de-interleaved mode. - * This means that pixels are processed in groups. - * - * @tparam fptype Floating point type to use for storing pixel elements. - * SIMD types cannot be used. - * @tparam fptypesimd The SIMD type used to store a pack of "fptype" values. - */ - -template< class fptype, class fptypesimd > -class CImageResizerFilterStepDIL : - public CImageResizerFilterStep< fptype, fptype > -{ -public: - using CImageResizerFilterStep< fptype, fptype > :: IsUpsample; - using CImageResizerFilterStep< fptype, fptype > :: ResampleFactor; - using CImageResizerFilterStep< fptype, fptype > :: Flt; - using CImageResizerFilterStep< fptype, fptype > :: FltOrig; - using CImageResizerFilterStep< fptype, fptype > :: FltLatency; - using CImageResizerFilterStep< fptype, fptype > :: Vars; - using CImageResizerFilterStep< fptype, fptype > :: InLen; - using CImageResizerFilterStep< fptype, fptype > :: InPrefix; - using CImageResizerFilterStep< fptype, fptype > :: InSuffix; - using CImageResizerFilterStep< fptype, fptype > :: InElIncr; - using CImageResizerFilterStep< fptype, fptype > :: OutLen; - using CImageResizerFilterStep< fptype, fptype > :: OutPrefix; - using CImageResizerFilterStep< fptype, fptype > :: OutSuffix; - using CImageResizerFilterStep< fptype, fptype > :: OutElIncr; - using CImageResizerFilterStep< fptype, fptype > :: PrefixDC; - using CImageResizerFilterStep< fptype, fptype > :: SuffixDC; - using CImageResizerFilterStep< fptype, fptype > :: RPosBuf; - using CImageResizerFilterStep< fptype, fptype > :: FltBank; - using CImageResizerFilterStep< fptype, fptype > :: EdgePixelCount; - - /** - * Function performs "packing" (de-interleaving) of a scanline and type - * conversion. If required, the sRGB gamma correction is applied. - * - * @param ip0 Input scanline, pixel elements interleaved. - * @param op0 Output scanline, pixel elements are grouped, "l" elements - * apart. - * @param l The number of pixels to "pack". - */ - - template< class Tin > - void packScanline( const Tin* const ip0, fptype* const op0, - const int l ) const - { - const int ElCount = Vars -> ElCount; - int j; - - if( !Vars -> UseSRGBGamma ) - { - for( j = 0; j < ElCount; j++ ) - { - const Tin* ip = ip0 + j; - fptype* const op = op0 + j * InElIncr; - int i; - - for( i = 0; i < l; i++ ) - { - op[ i ] = (fptype) *ip; - ip += ElCount; - } - } - } - else - { - const fptype gm = (fptype) Vars -> InGammaMult; - - for( j = 0; j < ElCount; j++ ) - { - const Tin* ip = ip0 + j; - fptype* const op = op0 + j * InElIncr; - int i; - - for( i = 0; i < l; i++ ) - { - op[ i ] = convertSRGB2Lin( (fptype) *ip * gm ); - ip += ElCount; - } - } - } - } - - /** - * Function applies Linear to sRGB gamma correction to the specified - * scanline. - * - * @param p Scanline. - * @param l The number of pixels to de-linearize. - * @param Vars0 Image resizing-related variables. - */ - - static void applySRGBGamma( fptype* const p0, const int l, - const CImageResizerVars& Vars0 ) - { - const int ElCount = Vars0.ElCount; - const fptype gm = (fptype) Vars0.OutGammaMult; - int j; - - for( j = 0; j < ElCount; j++ ) - { - fptype* const p = p0 + j * l; - int i; - - for( i = 0; i < l; i++ ) - { - p[ i ] = convertLin2SRGB( p[ i ]) * gm; - } - } - } - - /** - * Function converts vertical scanline to horizontal scanline. This - * function is called by the image resizer when image is resized - * vertically. This means that the vertical scanline is stored in the - * same format produced by the packScanline() and maintained by other - * filtering functions. - * - * @param ip Input vertical scanline, pixel elements are grouped, SrcLen - * elements apart. - * @param op Output buffer (temporary buffer used during resizing), pixel - * elements are grouped, "l" elements apart. - * @param SrcLen The number of pixels in the input scanline, also used to - * calculate input buffer increment. - * @param SrcIncr Input buffer increment to the next vertical pixel. - */ - - void convertVtoH( const fptype* ip, fptype* op, const int SrcLen, - const int SrcIncr ) const - { - const int ElCount = Vars -> ElCount; - const int SrcElIncr = SrcIncr / ElCount; - const int ips1 = SrcElIncr; - const int ips2 = SrcElIncr * 2; - const int ips3 = SrcElIncr * 3; - const int ops1 = InElIncr; - const int ops2 = InElIncr * 2; - const int ops3 = InElIncr * 3; - int j; - - if( ElCount == 1 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - ip += SrcIncr; - op++; - } - } - else - if( ElCount == 4 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ ops1 ] = ip[ ips1 ]; - op[ ops2 ] = ip[ ips2 ]; - op[ ops3 ] = ip[ ips3 ]; - ip += SrcIncr; - op++; - } - } - else - if( ElCount == 3 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ ops1 ] = ip[ ips1 ]; - op[ ops2 ] = ip[ ips2 ]; - ip += SrcIncr; - op++; - } - } - else - if( ElCount == 2 ) - { - for( j = 0; j < SrcLen; j++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ ops1 ] = ip[ ips1 ]; - ip += SrcIncr; - op++; - } - } - } - - /** - * Function performs "unpacking" of a scanline and type conversion - * (truncation is used when floating point is converted to integer). - * The unpacking function assumes that scanline is stored in the style - * produced by the packScanline() function. - * - * @param ip0 Input scanline, pixel elements are grouped, "l" elements - * apart. - * @param op0 Output scanline, pixel elements are interleaved. - * @param l The number of pixels to "unpack". - * @param Vars0 Image resizing-related variables. ElCount is assumed to be - * equal to ElCountIO. - */ - - template< class Tout > - static void unpackScanline( const fptype* const ip0, Tout* const op0, - const int l, const CImageResizerVars& Vars0 ) - { - const int ElCount = Vars0.ElCount; - int j; - - for( j = 0; j < ElCount; j++ ) - { - const fptype* const ip = ip0 + j * l; - Tout* op = op0 + j; - int i; - - for( i = 0; i < l; i++ ) - { - *op = (Tout) ip[ i ]; - op += ElCount; - } - } - } - - /** - * Function prepares input scanline buffer for *this filtering step. - * Left- and right-most pixels are replicated to make sure no buffer - * overrun happens. Such approach also allows to bypass any pointer - * range checks. - * - * @param Src Source buffer. - */ - - void prepareInBuf( fptype* Src ) const - { - if( IsUpsample || InPrefix + InSuffix == 0 ) - { - return; - } - - int j; - - for( j = 0; j < Vars -> ElCount; j++ ) - { - replicateArray( Src, 1, Src - InPrefix, InPrefix, 1 ); - fptype* const Src2 = Src + InLen - 1; - replicateArray( Src2, 1, Src2 + 1, InSuffix, 1 ); - Src += InElIncr; - } - } - - /** - * Function peforms scanline upsampling with filtering. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to ElCount. - * @param Dst Destination scanline buffer. - */ - - void doUpsample( const fptype* Src, fptype* Dst ) const - { - const int elalign = Vars -> elalign; - const int opstep = ResampleFactor; - const fptype* const f = Flt; - const int flen = Flt.getCapacity(); - int l; - int i; - int j; - - for( j = 0; j < Vars -> ElCount; j++ ) - { - const fptype* ip = Src; - fptype* op0 = &Dst[ -OutPrefix ]; - memset( op0, 0, ( OutPrefix + OutLen + OutSuffix ) * - sizeof( fptype )); - - if( FltOrig.getCapacity() > 0 ) - { - // Do not perform filtering, only upsample. - - op0 += OutPrefix % ResampleFactor; - l = OutPrefix / ResampleFactor; - - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0 += opstep; - ip++; - l--; - } - - l = OutSuffix / ResampleFactor; - - while( l >= 0 ) - { - op0[ 0 ] = ip[ 0 ]; - op0 += opstep; - l--; - } - - Src += InElIncr; - Dst += OutElIncr; - continue; - } - - l = InPrefix; - fptypesimd ipv = (fptypesimd) ip[ 0 ]; - - while( l > 0 ) - { - for( i = 0; i < flen; i += elalign ) - { - fptypesimd :: addu( op0 + i, - fptypesimd :: load( f + i ) * ipv ); - } - - op0 += opstep; - l--; - } - - l = InLen - 1; - - while( l > 0 ) - { - ipv = (fptypesimd) ip[ 0 ]; - - for( i = 0; i < flen; i += elalign ) - { - fptypesimd :: addu( op0 + i, - fptypesimd :: load( f + i ) * ipv ); - } - - ip++; - op0 += opstep; - l--; - } - - l = InSuffix; - ipv = (fptypesimd) ip[ 0 ]; - - while( l >= 0 ) - { - for( i = 0; i < flen; i += elalign ) - { - fptypesimd :: addu( op0 + i, - fptypesimd :: load( f + i ) * ipv ); - } - - op0 += opstep; - l--; - } - - const fptype* dc = SuffixDC; - l = SuffixDC.getCapacity(); - - for( i = 0; i < l; i += elalign ) - { - fptypesimd :: addu( op0 + i, - fptypesimd :: load( dc + i ) * ipv ); - } - - ipv = (fptypesimd) Src[ 0 ]; - op0 = Dst - InPrefix * opstep; - dc = PrefixDC; - l = PrefixDC.getCapacity(); - - for( i = 0; i < l; i += elalign ) - { - fptypesimd :: addu( op0 + i, - fptypesimd :: load( dc + i ) * ipv ); - } - - Src += InElIncr; - Dst += OutElIncr; - } - } - - /** - * Function peforms scanline filtering with optional downsampling. - * Function makes use of the symmetry of the filter. - * - * @param Src Source scanline buffer (length = this -> InLen). Source - * scanline increment will be equal to 1. - * @param Dst Destination scanline buffer. - * @param DstIncr Destination scanline buffer increment, used for - * horizontal or vertical scanline stepping. - */ - - void doFilter( const fptype* const Src, fptype* Dst, - const int DstIncr ) const - { - const int ElCount = Vars -> ElCount; - const int elalign = Vars -> elalign; - const fptype* const f = &Flt[ 0 ]; - const int flen = Flt.getCapacity(); - const int ipstep = ResampleFactor; - int i; - int j; - - if( ElCount == 1 ) - { - const fptype* ip = Src - EdgePixelCount * ipstep - FltLatency; - fptype* op = Dst; - int l = OutLen; - - while( l > 0 ) - { - fptypesimd s = fptypesimd :: load( f ) * - fptypesimd :: loadu( ip ); - - for( i = elalign; i < flen; i += elalign ) - { - s += fptypesimd :: load( f + i ) * - fptypesimd :: loadu( ip + i ); - } - - op[ 0 ] = s.hadd(); - op += DstIncr; - ip += ipstep; - l--; - } - } - else - if( DstIncr == 1 ) - { - for( j = 0; j < ElCount; j++ ) - { - const fptype* ip = Src - EdgePixelCount * ipstep - - FltLatency + j * InElIncr; - - fptype* op = Dst + j * OutElIncr; - int l = OutLen; - - while( l > 0 ) - { - fptypesimd s = fptypesimd :: load( f ) * - fptypesimd :: loadu( ip ); - - for( i = elalign; i < flen; i += elalign ) - { - s += fptypesimd :: load( f + i ) * - fptypesimd :: loadu( ip + i ); - } - - op[ 0 ] = s.hadd(); - op += DstIncr; - ip += ipstep; - l--; - } - } - } - else - { - const fptype* ip0 = Src - EdgePixelCount * ipstep - FltLatency; - fptype* op0 = Dst; - int l = OutLen; - - while( l > 0 ) - { - const fptype* ip = ip0; - fptype* op = op0; - - for( j = 0; j < ElCount; j++ ) - { - fptypesimd s = fptypesimd :: load( f ) * - fptypesimd :: loadu( ip ); - - for( i = elalign; i < flen; i += elalign ) - { - s += fptypesimd :: load( f + i ) * - fptypesimd :: loadu( ip + i ); - } - - op[ 0 ] = s.hadd(); - ip += InElIncr; - op += OutElIncr; - } - - ip0 += ipstep; - op0 += DstIncr; - l--; - } - } - } - - /** - * Function performs resizing of a single scanline. This function does - * not "know" about the length of the source scanline buffer. This buffer - * should be padded with enough pixels so that ( SrcPos - FilterLenD2 ) is - * always >= 0 and ( SrcPos + ( DstLineLen - 1 ) * k + FilterLenD2 + 1 ) - * does not exceed source scanline's buffer length. SrcLine's increment is - * assumed to be equal to 1. - * - * @param SrcLine Source scanline buffer. - * @param DstLine Destination (resized) scanline buffer. - * @param DstLineIncr Destination scanline position increment, used for - * horizontal or vertical scanline stepping. - * @param xx Temporary buffer, of size FltBank -> getFilterLen(), must be - * aligned by fpclass :: fpalign. - */ - - void doResize( const fptype* SrcLine, fptype* DstLine, - int DstLineIncr, fptype* const xx ) const - { - const int IntFltLen = FltBank -> getFilterLen(); - const int ElCount = Vars -> ElCount; - const int elalign = Vars -> elalign; - const typename CImageResizerFilterStep< fptype, fptype > :: - CResizePos* rpos = &(*RPosBuf)[ 0 ]; - - int DstLineLen = OutLen; - int i; - int j; - -#define AVIR_RESIZE_PART1 \ - while( DstLineLen > 0 ) \ - { \ - const fptypesimd x = (fptypesimd) rpos -> x; \ - const fptype* ftp = rpos -> ftp; \ - const fptype* ftp2 = rpos -> ftp + IntFltLen; \ - const fptype* Src = SrcLine + rpos -> SrcOffs; - -#define AVIR_RESIZE_PART1nx \ - while( DstLineLen > 0 ) \ - { \ - const fptype* ftp = rpos -> ftp; \ - const fptype* Src = SrcLine + rpos -> SrcOffs; - -#define AVIR_RESIZE_PART2 \ - DstLine += DstLineIncr; \ - rpos++; \ - DstLineLen--; \ - } - - if( ElCount == 1 ) - { - if( FltBank -> getOrder() == 1 ) - { - AVIR_RESIZE_PART1 - - fptypesimd sum = ( fptypesimd :: load( ftp ) + - fptypesimd :: load( ftp2 ) * x ) * - fptypesimd :: loadu( Src ); - - for( i = elalign; i < IntFltLen; i += elalign ) - { - sum += ( fptypesimd :: load( ftp + i ) + - fptypesimd :: load( ftp2 + i ) * x ) * - fptypesimd :: loadu( Src + i ); - } - - DstLine[ 0 ] = sum.hadd(); - - AVIR_RESIZE_PART2 - } - else - { - AVIR_RESIZE_PART1nx - - fptypesimd sum = fptypesimd :: load( ftp ) * - fptypesimd :: loadu( Src ); - - for( i = elalign; i < IntFltLen; i += elalign ) - { - sum += fptypesimd :: load( ftp + i ) * - fptypesimd :: loadu( Src + i ); - } - - DstLine[ 0 ] = sum.hadd(); - - AVIR_RESIZE_PART2 - } - } - else - if( DstLineIncr == 1 ) - { - // Horizontal-oriented processing, element loop is outer. - - const int SrcIncr = InElIncr; - const int DstLineElIncr = OutElIncr - DstLineIncr * DstLineLen; - - if( FltBank -> getOrder() == 1 ) - { - for( j = 0; j < ElCount; j++ ) - { - AVIR_RESIZE_PART1 - - fptypesimd sum = 0.0; - - for( i = 0; i < IntFltLen; i += elalign ) - { - sum += ( fptypesimd :: load( ftp + i ) + - fptypesimd :: load( ftp2 + i ) * x ) * - fptypesimd :: loadu( Src + i ); - } - - DstLine[ 0 ] = sum.hadd(); - - AVIR_RESIZE_PART2 - - DstLine += DstLineElIncr; - SrcLine += SrcIncr; - DstLineLen = OutLen; - rpos = &(*RPosBuf)[ 0 ]; - } - } - else - { - for( j = 0; j < ElCount; j++ ) - { - AVIR_RESIZE_PART1nx - - fptypesimd sum = fptypesimd :: load( ftp ) * - fptypesimd :: loadu( Src ); - - for( i = elalign; i < IntFltLen; i += elalign ) - { - sum += fptypesimd :: load( ftp + i ) * - fptypesimd :: loadu( Src + i ); - } - - DstLine[ 0 ] = sum.hadd(); - - AVIR_RESIZE_PART2 - - DstLine += DstLineElIncr; - SrcLine += SrcIncr; - DstLineLen = OutLen; - rpos = &(*RPosBuf)[ 0 ]; - } - } - } - else - { - const int SrcIncr = InElIncr; - const int DstLineElIncr = OutElIncr; - DstLineIncr -= DstLineElIncr * ElCount; - - if( FltBank -> getOrder() == 1 ) - { - AVIR_RESIZE_PART1 - - for( i = 0; i < IntFltLen; i += elalign ) - { - ( fptypesimd :: load( ftp + i ) + - fptypesimd :: load( ftp2 + i ) * x ).store( xx + i ); - } - - for( j = 0; j < ElCount; j++ ) - { - fptypesimd sum = fptypesimd :: load( xx ) * - fptypesimd :: loadu( Src ); - - for( i = elalign; i < IntFltLen; i += elalign ) - { - sum += fptypesimd :: load( xx + i ) * - fptypesimd :: loadu( Src + i ); - } - - DstLine[ 0 ] = sum.hadd(); - DstLine += DstLineElIncr; - Src += SrcIncr; - } - - AVIR_RESIZE_PART2 - } - else - { - AVIR_RESIZE_PART1nx - - for( j = 0; j < ElCount; j++ ) - { - fptypesimd sum = fptypesimd :: load( ftp ) * - fptypesimd :: loadu( Src ); - - for( i = elalign; i < IntFltLen; i += elalign ) - { - sum += fptypesimd :: load( ftp + i ) * - fptypesimd :: loadu( Src + i ); - } - - DstLine[ 0 ] = sum.hadd(); - DstLine += DstLineElIncr; - Src += SrcIncr; - } - - AVIR_RESIZE_PART2 - } - } - -#undef AVIR_RESIZE_PART2 -#undef AVIR_RESIZE_PART1nx -#undef AVIR_RESIZE_PART1 - } -}; - -/** - * @brief Image resizer's default de-interleaved dithering class. - * - * This class defines an object that performs rounding, clipping and dithering - * operations over horizontal scanline pixels before scanline is stored in the - * output buffer. - * - * This ditherer implementation uses de-interleaved SIMD algorithm. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types cannot be used. - * @tparam fptypesimd The SIMD type used to store a pack of "fptype" values. - */ - -template< class fptype, class fptypesimd > -class CImageResizerDithererDefDIL -{ -public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init( const int aLen, const CImageResizerVars& aVars, - const double aTrMul, const double aPkOut ) - { - Len = aLen; - Vars = &aVars; - LenE = aLen * Vars -> ElCount; - TrMul0 = aTrMul; - PkOut0 = aPkOut; - } - - /** - * @return "True" if dithering is recursive relative to scanlines meaning - * multi-threaded execution is not supported by this dithering method. - */ - - static bool isRecursive() - { - return( false ); - } - - /** - * Function performs rounding and clipping operations. - * - * @param ResScanline The buffer containing the final scanline. - */ - - void dither( fptype* const ResScanline ) const - { - const int elalign = Vars -> elalign; - const fptypesimd c0 = 0.0; - const fptypesimd PkOut = (fptypesimd) PkOut0; - int j; - - if( TrMul0 == 1.0 ) - { - // Optimization - do not perform bit truncation. - - for( j = 0; j < LenE - elalign; j += elalign ) - { - const fptypesimd z0 = round( - fptypesimd :: loadu( ResScanline + j )); - - clamp( z0, c0, PkOut ).storeu( ResScanline + j ); - } - - const int lim = LenE - j; - const fptypesimd z0 = round( - fptypesimd :: loadu( ResScanline + j, lim )); - - clamp( z0, c0, PkOut ).storeu( ResScanline + j, lim ); - } - else - { - const fptypesimd TrMul = (fptypesimd) TrMul0; - - for( j = 0; j < LenE - elalign; j += elalign ) - { - const fptypesimd z0 = round( - fptypesimd :: loadu( ResScanline + j ) / TrMul ) * TrMul; - - clamp( z0, c0, PkOut ).storeu( ResScanline + j ); - } - - const int lim = LenE - j; - const fptypesimd z0 = round( - fptypesimd :: loadu( ResScanline + j, lim ) / TrMul ) * TrMul; - - clamp( z0, c0, PkOut ).storeu( ResScanline + j, lim ); - } - } - -protected: - int Len; ///< Scanline's length in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int LenE; ///< = LenE * ElCount. - ///< - double TrMul0; ///< Bit-depth truncation multiplier. - ///< - double PkOut0; ///< Peak output value allowed. - ///< -}; - -/** - * @brief Image resizer's error-diffusion dithering class, de-interleaved - * mode. - * - * This ditherer implements error-diffusion dithering which looks good, and - * whose results are compressed by PNG well. - * - * @tparam fptype Floating point type to use for storing pixel data. SIMD - * types cannot be used. - * @tparam fptypesimd Processing type, SIMD can be used. - */ - -template< class fptype, class fptypesimd > -class CImageResizerDithererErrdDIL -{ -public: - /** - * Function initializes the ditherer object. - * - * @param aLen Scanline length in pixels to process. - * @param aVars Image resizing-related variables. - * @param aTrMul Bit-depth truncation multiplier. 1 - no additional - * truncation. - * @param aPkOut Peak output value allowed. - */ - - void init( const int aLen, const CImageResizerVars& aVars, - const double aTrMul, const double aPkOut ) - { - Len = aLen; - Vars = &aVars; - LenE = aLen * Vars -> ElCount; - TrMul0 = aTrMul; - PkOut0 = aPkOut; - - ResScanlineDith0.alloc( LenE + Vars -> ElCount, sizeof( fptype )); - ResScanlineDith = ResScanlineDith0 + Vars -> ElCount; - int i; - - for( i = 0; i < LenE + Vars -> ElCount; i++ ) - { - ResScanlineDith0[ i ] = 0.0; - } - } - - static bool isRecursive() - { - return( true ); - } - - void dither( fptype* const ResScanline ) - { - const int ea = Vars -> elalign; - const fptypesimd c0 = 0.0; - const fptypesimd TrMul = (fptypesimd) TrMul0; - const fptypesimd PkOut = (fptypesimd) PkOut0; - int j; - - for( j = 0; j < LenE - ea; j += ea ) - { - fptypesimd :: addu( ResScanline + j, - fptypesimd :: loadu( ResScanlineDith + j )); - - c0.storeu( ResScanlineDith + j ); - } - - int lim = LenE - j; - fptypesimd :: addu( ResScanline + j, - fptypesimd :: loadu( ResScanlineDith + j, lim ), lim ); - - c0.storeu( ResScanlineDith + j, lim ); - - const int Len1 = Len - 1; - fptype* rs = ResScanline; - fptype* rsd = ResScanlineDith; - int i; - - for( i = 0; i < Vars -> ElCount; i++ ) - { - for( j = 0; j < Len1; j++ ) - { - // Perform rounding, noise estimation and saturation. - - fptype* const rsj = rs + j; - const fptype z0 = round( rsj[ 0 ] / TrMul ) * TrMul; - const fptype Noise = rsj[ 0 ] - z0; - rsj[ 0 ] = clamp( z0, (fptype) 0.0, PkOut ); - - fptype* const rsdj = rsd + j; - rsj[ 1 ] += Noise * (fptype) 0.364842; - rsdj[ -1 ] += Noise * (fptype) 0.207305; - rsdj[ 0 ] += Noise * (fptype) 0.364842; - rsdj[ 1 ] += Noise * (fptype) 0.063011; - } - - // Process the last pixel element in scanline. - - const fptype z1 = round( rs[ Len1 ] / TrMul ) * TrMul; - const fptype Noise2 = rs[ Len1 ] - z1; - rs[ Len1 ] = clamp( z1, c0, PkOut ); - - rsd[ Len1 - 1 ] += Noise2 * (fptype) 0.207305; - rsd[ Len1 ] += Noise2 * (fptype) 0.364842; - - rs += Len; - rsd += Len; - } - } - -protected: - int Len; ///< Scanline's length in pixels. - ///< - const CImageResizerVars* Vars; ///< Image resizing-related variables. - ///< - int LenE; ///< = LenE * ElCount. - ///< - double TrMul0; ///< Bit-depth truncation multiplier. - ///< - double PkOut0; ///< Peak output value allowed. - ///< - CBuffer< fptype > ResScanlineDith0; ///< Error propagation buffer for - ///< dithering, first pixel unused. - ///< - fptype* ResScanlineDith; ///< Error propagation buffer pointer which skips - ///< the first ElCount elements. - ///< -}; - -/** - * @brief Floating-point processing definition and abstraction class for - * de-interleaved processing. - * - * This class defines several constants and typedefs that point to classes - * that should be used by the image resizing algorithm. This implementation - * points to de-interleaved processing classes. - * - * @tparam afptype Floating point type to use for storing intermediate data - * and variables. SIMD types should not be used. - * @tparam afptypesimd SIMD type used to perform processing. - * @tparam adith Ditherer class to use during processing. - */ - -template< class afptype, class afptypesimd, - class adith = CImageResizerDithererDefDIL< afptype, afptypesimd > > -class fpclass_def_dil -{ -public: - typedef afptype fptype; ///< Floating-point type to use during processing. - ///< - typedef afptype fptypeatom; ///< Atomic type "fptype" consists of. - ///< - static const int fppack = 1; ///< The number of atomic types stored in a - ///< single "fptype" element. - ///< - static const int fpalign = sizeof( afptypesimd ); ///< Suggested alignment - ///< size in bytes. This is not a required alignment, because image - ///< resizing algorithm cannot be made to have a strictly aligned data - ///< access in all cases (e.g. de-interleaved interpolation cannot - ///< perform aligned accesses). - ///< - static const int elalign = sizeof( afptypesimd ) / sizeof( afptype ); ///< - ///< Length alignment of arrays of elements. This applies to filters - ///< and intermediate buffers: this constant forces filters and - ///< scanlines to have a length which is a multiple of this value, for - ///< more efficient SIMD implementation. - ///< - static const int packmode = 1; ///< 0 if interleaved packing, 1 if - ///< de-interleaved. - ///< - typedef CImageResizerFilterStepDIL< fptype, afptypesimd > CFilterStep; ///< - ///< Filtering step class to use during processing. - ///< - typedef adith CDitherer; ///< Ditherer class to use during processing. - ///< -}; - -} // namespace avir diff --git a/third_party/avir/avir_float4_sse.h b/third_party/avir/avir_float4_sse.h deleted file mode 100644 index 143d8f89..00000000 --- a/third_party/avir/avir_float4_sse.h +++ /dev/null @@ -1,324 +0,0 @@ -/* clang-format off */ -//$ nobt -//$ nocpp - -/** - * @file avir_float4_sse.h - * - * @brief Inclusion file for the "float4" type. - * - * This file includes the "float4" SSE-based type used for SIMD variable - * storage and processing. - * - * AVIR Copyright (c) 2015-2019 Aleksey Vaneev - */ - -#ifndef AVIR_FLOAT4_SSE_INCLUDED -#define AVIR_FLOAT4_SSE_INCLUDED - -#include "third_party/avir/avir.h" -#include "libc/bits/mmintrin.internal.h" -#include "libc/bits/xmmintrin.internal.h" -#include "libc/bits/xmmintrin.internal.h" -#include "libc/bits/xmmintrin.internal.h" -#include "libc/bits/emmintrin.internal.h" - -namespace avir { - -/** - * @brief SIMD packed 4-float type. - * - * This class implements a packed 4-float type that can be used to perform - * parallel computation using SIMD instructions on SSE-enabled processors. - * This class can be used as the "fptype" argument of the avir::fpclass_def - * class. - */ - -class float4 -{ -public: - float4() - { - } - - float4( const float4& s ) - : value( s.value ) - { - } - - float4( const __m128 s ) - : value( s ) - { - } - - float4( const float s ) - : value( _mm_set1_ps( s )) - { - } - - float4& operator = ( const float4& s ) - { - value = s.value; - return( *this ); - } - - float4& operator = ( const __m128 s ) - { - value = s; - return( *this ); - } - - float4& operator = ( const float s ) - { - value = _mm_set1_ps( s ); - return( *this ); - } - - operator float () const - { - return( _mm_cvtss_f32( value )); - } - - /** - * @param p Pointer to memory from where the value should be loaded, - * should be 16-byte aligned. - * @return float4 value loaded from the specified memory location. - */ - - static float4 load( const float* const p ) - { - return( _mm_load_ps( p )); - } - - /** - * @param p Pointer to memory from where the value should be loaded, - * may have any alignment. - * @return float4 value loaded from the specified memory location. - */ - - static float4 loadu( const float* const p ) - { - return( _mm_loadu_ps( p )); - } - - /** - * @param p Pointer to memory from where the value should be loaded, - * may have any alignment. - * @param lim The maximum number of elements to load, >0. - * @return float4 value loaded from the specified memory location, with - * elements beyond "lim" set to 0. - */ - - static float4 loadu( const float* const p, int lim ) - { - if( lim > 2 ) - { - if( lim > 3 ) - { - return( _mm_loadu_ps( p )); - } - else - { - return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ])); - } - } - else - { - if( lim == 2 ) - { - return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ])); - } - else - { - return( _mm_load_ss( p )); - } - } - } - - /** - * Function stores *this value to the specified memory location. - * - * @param[out] p Output memory location, should be 16-byte aligned. - */ - - void store( float* const p ) const - { - _mm_store_ps( p, value ); - } - - /** - * Function stores *this value to the specified memory location. - * - * @param[out] p Output memory location, may have any alignment. - */ - - void storeu( float* const p ) const - { - _mm_storeu_ps( p, value ); - } - - /** - * Function stores "lim" lower elements of *this value to the specified - * memory location. - * - * @param[out] p Output memory location, may have any alignment. - * @param lim The number of lower elements to store, >0. - */ - - void storeu( float* const p, int lim ) const - { - if( lim > 2 ) - { - if( lim > 3 ) - { - _mm_storeu_ps( p, value ); - } - else - { - _mm_storel_pi( (__m64*) p, value ); - _mm_store_ss( p + 2, _mm_movehl_ps( value, value )); - } - } - else - { - if( lim == 2 ) - { - _mm_storel_pi( (__m64*) p, value ); - } - else - { - _mm_store_ss( p, value ); - } - } - } - - float4& operator += ( const float4& s ) - { - value = _mm_add_ps( value, s.value ); - return( *this ); - } - - float4& operator -= ( const float4& s ) - { - value = _mm_sub_ps( value, s.value ); - return( *this ); - } - - float4& operator *= ( const float4& s ) - { - value = _mm_mul_ps( value, s.value ); - return( *this ); - } - - float4& operator /= ( const float4& s ) - { - value = _mm_div_ps( value, s.value ); - return( *this ); - } - - float4 operator + ( const float4& s ) const - { - return( _mm_add_ps( value, s.value )); - } - - float4 operator - ( const float4& s ) const - { - return( _mm_sub_ps( value, s.value )); - } - - float4 operator * ( const float4& s ) const - { - return( _mm_mul_ps( value, s.value )); - } - - float4 operator / ( const float4& s ) const - { - return( _mm_div_ps( value, s.value )); - } - - /** - * @return Horizontal sum of elements. - */ - - float hadd() const - { - const __m128 v = _mm_add_ps( value, _mm_movehl_ps( value, value )); - const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 )); - return( _mm_cvtss_f32( res )); - } - - /** - * Function performs in-place addition of a value located in memory and - * the specified value. - * - * @param p Pointer to value where addition happens. May be unaligned. - * @param v Value to add. - */ - - static void addu( float* const p, const float4& v ) - { - ( loadu( p ) + v ).storeu( p ); - } - - /** - * Function performs in-place addition of a value located in memory and - * the specified value. Limited to the specfied number of elements. - * - * @param p Pointer to value where addition happens. May be unaligned. - * @param v Value to add. - * @param lim The element number limit, >0. - */ - - static void addu( float* const p, const float4& v, const int lim ) - { - ( loadu( p, lim ) + v ).storeu( p, lim ); - } - - __m128 value; ///< Packed value of 4 floats. - ///< -}; - -/** - * SIMD rounding function, exact result. - * - * @param v Value to round. - * @return Rounded SIMD value. - */ - -inline float4 round( const float4& v ) -{ - unsigned int prevrm = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST ); - - const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.value )); - - _MM_SET_ROUNDING_MODE( prevrm ); - - return( res ); -} - -/** - * SIMD function "clamps" (clips) the specified packed values so that they are - * not lesser than "minv", and not greater than "maxv". - * - * @param Value Value to clamp. - * @param minv Minimal allowed value. - * @param maxv Maximal allowed value. - * @return The clamped value. - */ - -inline float4 clamp( const float4& Value, const float4& minv, - const float4& maxv ) -{ - return( _mm_min_ps( _mm_max_ps( Value.value, minv.value ), maxv.value )); -} - -typedef fpclass_def< avir :: float4, float > fpclass_float4; ///< - ///< Class that can be used as the "fpclass" template parameter of the - ///< avir::CImageResizer class to perform calculation using default - ///< interleaved algorithm, using SIMD float4 type. - ///< - -} // namespace avir - -#endif // AVIR_FLOAT4_SSE_INCLUDED diff --git a/third_party/avir/avir_float8_avx.h b/third_party/avir/avir_float8_avx.h deleted file mode 100644 index 72834bfd..00000000 --- a/third_party/avir/avir_float8_avx.h +++ /dev/null @@ -1,365 +0,0 @@ -/* clang-format off */ -//$ nobt -//$ nocpp - -/** - * @file avir_float8_avx.h - * - * @brief Inclusion file for the "float8" type. - * - * This file includes the "float8" AVX-based type used for SIMD variable - * storage and processing. - * - * AVIR Copyright (c) 2015-2019 Aleksey Vaneev - */ - -#ifndef AVIR_FLOAT8_AVX_INCLUDED -#define AVIR_FLOAT8_AVX_INCLUDED - -#include "libc/bits/mmintrin.internal.h" -#include "libc/bits/avxintrin.internal.h" -#include "libc/bits/smmintrin.internal.h" -#include "libc/bits/pmmintrin.internal.h" -#include "libc/bits/avx2intrin.internal.h" -#include "libc/bits/xmmintrin.internal.h" -#include "third_party/avir/avir_dil.h" - -namespace avir { - -/** - * @brief SIMD packed 8-float type. - * - * This class implements a packed 8-float type that can be used to perform - * parallel computation using SIMD instructions on AVX-enabled processors. - * This class can be used as the "fptype" argument of the avir::fpclass_def - * or avir::fpclass_def_dil class. - */ - -class float8 -{ -public: - float8() - { - } - - float8( const float8& s ) - : value( s.value ) - { - } - - float8( const __m256 s ) - : value( s ) - { - } - - float8( const float s ) - : value( _mm256_set1_ps( s )) - { - } - - float8& operator = ( const float8& s ) - { - value = s.value; - return( *this ); - } - - float8& operator = ( const __m256 s ) - { - value = s; - return( *this ); - } - - float8& operator = ( const float s ) - { - value = _mm256_set1_ps( s ); - return( *this ); - } - - operator float () const - { - return( _mm_cvtss_f32( _mm256_extractf128_ps( value, 0 ))); - } - - /** - * @param p Pointer to memory from where the value should be loaded, - * should be 32-byte aligned. - * @return float8 value loaded from the specified memory location. - */ - - static float8 load( const float* const p ) - { - return( _mm256_load_ps( p )); - } - - /** - * @param p Pointer to memory from where the value should be loaded, - * may have any alignment. - * @return float8 value loaded from the specified memory location. - */ - - static float8 loadu( const float* const p ) - { - return( _mm256_loadu_ps( p )); - } - - /** - * @param p Pointer to memory from where the value should be loaded, - * may have any alignment. - * @param lim The maximum number of elements to load, >0. - * @return float8 value loaded from the specified memory location, with - * elements beyond "lim" set to 0. - */ - - static float8 loadu( const float* const p, const int lim ) - { - __m128 lo; - __m128 hi; - - if( lim > 4 ) - { - lo = _mm_loadu_ps( p ); - hi = loadu4( p + 4, lim - 4 ); - } - else - { - lo = loadu4( p, lim ); - hi = _mm_setzero_ps(); - } - - return( _mm256_insertf128_ps( _mm256_castps128_ps256( lo ), hi, 1 )); - } - - /** - * Function stores *this value to the specified memory location. - * - * @param[out] p Output memory location, should be 32-byte aligned. - */ - - void store( float* const p ) const - { - _mm256_store_ps( p, value ); - } - - /** - * Function stores *this value to the specified memory location. - * - * @param[out] p Output memory location, may have any alignment. - */ - - void storeu( float* const p ) const - { - _mm256_storeu_ps( p, value ); - } - - /** - * Function stores "lim" lower elements of *this value to the specified - * memory location. - * - * @param[out] p Output memory location, may have any alignment. - * @param lim The number of lower elements to store, >0. - */ - - void storeu( float* p, int lim ) const - { - __m128 v; - - if( lim > 4 ) - { - _mm_storeu_ps( p, _mm256_extractf128_ps( value, 0 )); - v = _mm256_extractf128_ps( value, 1 ); - p += 4; - lim -= 4; - } - else - { - v = _mm256_extractf128_ps( value, 0 ); - } - - if( lim > 2 ) - { - if( lim > 3 ) - { - _mm_storeu_ps( p, v ); - } - else - { - _mm_storel_pi( (__m64*) p, v ); - _mm_store_ss( p + 2, _mm_movehl_ps( v, v )); - } - } - else - { - if( lim == 2 ) - { - _mm_storel_pi( (__m64*) p, v ); - } - else - { - _mm_store_ss( p, v ); - } - } - } - - float8& operator += ( const float8& s ) - { - value = _mm256_add_ps( value, s.value ); - return( *this ); - } - - float8& operator -= ( const float8& s ) - { - value = _mm256_sub_ps( value, s.value ); - return( *this ); - } - - float8& operator *= ( const float8& s ) - { - value = _mm256_mul_ps( value, s.value ); - return( *this ); - } - - float8& operator /= ( const float8& s ) - { - value = _mm256_div_ps( value, s.value ); - return( *this ); - } - - float8 operator + ( const float8& s ) const - { - return( _mm256_add_ps( value, s.value )); - } - - float8 operator - ( const float8& s ) const - { - return( _mm256_sub_ps( value, s.value )); - } - - float8 operator * ( const float8& s ) const - { - return( _mm256_mul_ps( value, s.value )); - } - - float8 operator / ( const float8& s ) const - { - return( _mm256_div_ps( value, s.value )); - } - - /** - * @return Horizontal sum of elements. - */ - - float hadd() const - { - __m128 v = _mm_add_ps( _mm256_extractf128_ps( value, 0 ), - _mm256_extractf128_ps( value, 1 )); - - v = _mm_hadd_ps( v, v ); - v = _mm_hadd_ps( v, v ); - return( _mm_cvtss_f32( v )); - } - - /** - * Function performs in-place addition of a value located in memory and - * the specified value. - * - * @param p Pointer to value where addition happens. May be unaligned. - * @param v Value to add. - */ - - static void addu( float* const p, const float8& v ) - { - ( loadu( p ) + v ).storeu( p ); - } - - /** - * Function performs in-place addition of a value located in memory and - * the specified value. Limited to the specfied number of elements. - * - * @param p Pointer to value where addition happens. May be unaligned. - * @param v Value to add. - * @param lim The element number limit, >0. - */ - - static void addu( float* const p, const float8& v, const int lim ) - { - ( loadu( p, lim ) + v ).storeu( p, lim ); - } - - __m256 value; ///< Packed value of 8 floats. - ///< - -private: - /** - * @param p Pointer to memory from where the value should be loaded, - * may have any alignment. - * @param lim The maximum number of elements to load, >0. - * @return __m128 value loaded from the specified memory location, with - * elements beyond "lim" set to 0. - */ - - static __m128 loadu4( const float* const p, const int lim ) - { - if( lim > 2 ) - { - if( lim > 3 ) - { - return( _mm_loadu_ps( p )); - } - else - { - return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ])); - } - } - else - { - if( lim == 2 ) - { - return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ])); - } - else - { - return( _mm_load_ss( p )); - } - } - } -}; - -/** - * SIMD rounding function, exact result. - * - * @param v Value to round. - * @return Rounded SIMD value. - */ - -inline float8 round( const float8& v ) -{ - return( _mm256_round_ps( v.value, - ( _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC ))); -} - -/** - * SIMD function "clamps" (clips) the specified packed values so that they are - * not lesser than "minv", and not greater than "maxv". - * - * @param Value Value to clamp. - * @param minv Minimal allowed value. - * @param maxv Maximal allowed value. - * @return The clamped value. - */ - -inline float8 clamp( const float8& Value, const float8& minv, - const float8& maxv ) -{ - return( _mm256_min_ps( _mm256_max_ps( Value.value, minv.value ), - maxv.value )); -} - -typedef fpclass_def_dil< float, avir :: float8 > fpclass_float8_dil; ///< - ///< Class that can be used as the "fpclass" template parameter of the - ///< avir::CImageResizer class to perform calculation using - ///< de-interleaved SIMD algorithm, using SIMD float8 type. - ///< - -} // namespace avir - -#endif // AVIR_FLOAT8_AVX_INCLUDED diff --git a/third_party/avir/lancir.h b/third_party/avir/lancir.h deleted file mode 100644 index 22055fcb..00000000 --- a/third_party/avir/lancir.h +++ /dev/null @@ -1,1494 +0,0 @@ -// clang-format off -//$ nobt -//$ nocpp - -/** - * @file lancir.h - * - * @brief The self-contained "lancir" inclusion file. - * - * This is the self-contained inclusion file for the "LANCIR" image resizer, - * part of the AVIR library. - * - * AVIR Copyright (c) 2015-2019 Aleksey Vaneev - * - * @mainpage - * - * @section intro_sec Introduction - * - * Description is available at https://github.com/avaneev/avir - * - * @section license License - * - * AVIR License Agreement - * - * The MIT License (MIT) - * - * Copyright (c) 2015-2019 Aleksey Vaneev - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef AVIR_CLANCIR_INCLUDED -#define AVIR_CLANCIR_INCLUDED - -#include "third_party/avir/notice.h" -#include "libc/str/str.h" -#include "libc/log/log.h" -#include "libc/mem/mem.h" -#include "libc/log/check.h" -#include "libc/macros.h" -#include "libc/math.h" - -namespace avir { - -/** - * The macro equals to "pi" constant, fills 53-bit floating point mantissa. - * Undefined at the end of file. - */ - -#define LANCIR_PI 3.1415926535897932 - -/** - * @brief LANCIR image resizer class. - * - * The object of this class can be used to resize 1-4 channel images to any - * required size. Resizing is performed by utilizing Lanczos filters, with - * 8-bit precision. This class offers a kind of "optimal" Lanczos resampling - * implementation. - * - * Object of this class can be allocated on stack. - * - * Note that object of this class does not free temporary buffers and - * variables after the resizeImage() call (until object's destruction), these - * buffers are reused on subsequent calls making batch resizing of same-size - * images faster. This means resizing is not thread-safe: a separate object - * should be created for each thread. - */ - -class CLancIR -{ -private: - CLancIR( const CLancIR& ) - { - // Unsupported. - } - - CLancIR& operator = ( const CLancIR& ) - { - // Unsupported. - return( *this ); - } - -public: - CLancIR() - : FltBuf( NULL ) - , FltBufLen( 0 ) - , spv( NULL ) - , spvlen( 0 ) - { - } - - ~CLancIR() - { - delete[] FltBuf; - delete[] spv; - } - - /** - * Function resizes image. - * - * @param SrcBuf Source image buffer. - * @param SrcWidth Source image width. - * @param SrcHeight Source image height. - * @param SrcScanlineSize Physical size of source scanline in elements - * (not bytes). If this value is below 1, SrcWidth * ElCount will be - * used as the physical source scanline size. - * @param[out] NewBuf Buffer to accept the resized image. Can be equal to - * SrcBuf if the size of the resized image is smaller or equal to source - * image in size. - * @param NewWidth New image width. - * @param NewHeight New image height. - * @param ElCount The number of elements (channels) used to store each - * source and destination pixel (1-4). - * @param kx0 Resizing step - horizontal (one output pixel corresponds to - * "k" input pixels). A downsizing factor if > 1.0; upsizing factor - * if <= 1.0. Multiply by -1 if you would like to bypass "ox" and "oy" - * adjustment which is done by default to produce a centered image. If - * step value equals 0, the step value will be chosen automatically. - * @param ky0 Resizing step - vertical. Same as "kx". - * @param ox Start X pixel offset within source image (can be negative). - * Positive offset moves the image to the left. - * @param oy Start Y pixel offset within source image (can be negative). - * Positive offset moves the image to the top. - * @tparam T Input and output buffer element's type. Can be uint8_t - * (0-255 value range), uint16_t (0-65535 value range), float - * (any value range), double (any value range). Larger integer types are - * treated as uint16_t. Signed integer types are unsupported. - */ - - template< class T > - void resizeImage( const T* const SrcBuf, const int SrcWidth, - const int SrcHeight, int SrcScanlineSize, T* const NewBuf, - const int NewWidth, const int NewHeight, const int ElCount, - const double kx0 = 0.0, const double ky0 = 0.0, double ox = 0.0, - double oy = 0.0 ) - { - if( NewWidth <= 0 || NewHeight <= 0 ) - { - return; - } - - if( SrcWidth <= 0 || SrcHeight <= 0 ) - { - handleEmptySrcCornerCase( NewBuf, (size_t) NewWidth * NewHeight * sizeof( T ) ); - return; - } - - const double la = 3.0; // Lanczos "a". - double kx; - double ky; - - if( kx0 == 0.0 ) - { - if( NewWidth > SrcWidth ) - { - kx = (double) ( SrcWidth - 1 ) / ( NewWidth - 1 ); - } - else - { - kx = (double) SrcWidth / NewWidth; - ox += ( kx - 1.0 ) * 0.5; - } - } - else - if( kx0 > 0.0 ) - { - kx = kx0; - - if( kx0 > 1.0 ) - { - ox += ( kx0 - 1.0 ) * 0.5; - } - } - else - { - kx = -kx0; - } - - if( ky0 == 0.0 ) - { - if( NewHeight > SrcHeight ) - { - ky = (double) ( SrcHeight - 1 ) / ( NewHeight - 1 ); - } - else - { - ky = (double) SrcHeight / NewHeight; - oy += ( ky - 1.0 ) * 0.5; - } - } - else - if( ky0 > 0.0 ) - { - ky = ky0; - - if( ky0 > 1.0 ) - { - oy += ( ky0 - 1.0 ) * 0.5; - } - } - else - { - ky = -ky0; - } - - if( rfh.update( la, kx )) - { - rsh.reset(); - rsv.reset(); - } - - CResizeFilters* rfv; // Pointer to resizing filters for vertical - // resizing, may equal to "rfh" if the same stepping is in use. - - if( ky == kx ) - { - rfv = &rfh; - } - else - { - rfv = &rfv0; - - if( rfv0.update( la, ky )) - { - rsv.reset(); - } - } - - rsh.update( kx, ox, ElCount, SrcWidth, NewWidth, rfh ); - rsv.update( ky, oy, ElCount, SrcHeight, NewHeight, *rfv ); - - const int NewWidthE = NewWidth * ElCount; - - if( SrcScanlineSize < 1 ) - { - SrcScanlineSize = SrcWidth * ElCount; - } - - // Allocate/resize temporary buffer. - - const size_t FltBufLenNew = (size_t) NewWidthE * (size_t) SrcHeight; - - if( FltBufLenNew > FltBufLen ) - { - free( FltBuf ); - FltBufLen = FltBufLenNew; - FltBuf = (float *) memalign( 32, sizeof(float) * FltBufLen ); - CHECK_NOTNULL(FltBuf); - } - - // Perform horizontal resizing. - - const T* ips = SrcBuf; - float* op = FltBuf; - size_t i; - - if( ElCount == 3 ) - { - for( i = 0; i < SrcHeight; i++ ) - { - copyScanline3h( ips, rsh, SrcWidth ); - resize3( op, NewWidth, rsh.pos, rfh.KernelLen ); - ips += SrcScanlineSize; - op += NewWidthE; - } - } - else - if( ElCount == 1 ) - { - for( i = 0; i < SrcHeight; i++ ) - { - copyScanline1h( ips, rsh, SrcWidth ); - resize1( op, NewWidth, rsh.pos, rfh.KernelLen ); - ips += SrcScanlineSize; - op += NewWidthE; - } - } - else - if( ElCount == 4 ) - { - for( i = 0; i < SrcHeight; i++ ) - { - copyScanline4h( ips, rsh, SrcWidth ); - resize4( op, NewWidth, rsh.pos, rfh.KernelLen ); - ips += SrcScanlineSize; - op += NewWidthE; - } - } - else - if( ElCount == 2 ) - { - for( i = 0; i < SrcHeight; i++ ) - { - copyScanline2h( ips, rsh, SrcWidth ); - resize2( op, NewWidth, rsh.pos, rfh.KernelLen ); - ips += SrcScanlineSize; - op += NewWidthE; - } - } - - // Perform vertical resizing. - - const int spvlennew = NewHeight * ElCount; - - if( spvlennew > spvlen ) - { - free( spv ); - spvlen = spvlennew; - spv = (float *) memalign( 32, sizeof(float) * spvlen ); - } - - const bool IsIOFloat = ( (T) 0.25 != 0 ); - const int Clamp = ( sizeof( T ) == 1 ? 255 : 65535 ); - const float* ip = FltBuf; - T* opd = NewBuf; - - if( ElCount == 3 ) - { - for( i = 0; i < NewWidth; i++ ) - { - copyScanline3v( ip, rsv, SrcHeight, NewWidthE ); - resize3( spv, NewHeight, rsv.pos, rfv -> KernelLen ); - copyOutput3( spv, opd, NewHeight, NewWidthE, IsIOFloat, - Clamp ); - - ip += 3; - opd += 3; - } - } - else - if( ElCount == 1 ) - { - for( i = 0; i < NewWidth; i++ ) - { - copyScanline1v( ip, rsv, SrcHeight, NewWidthE ); - resize1( spv, NewHeight, rsv.pos, rfv -> KernelLen ); - copyOutput1( spv, opd, NewHeight, NewWidthE, IsIOFloat, - Clamp ); - - ip++; - opd++; - } - } - else - if( ElCount == 4 ) - { - for( i = 0; i < NewWidth; i++ ) - { - copyScanline4v( ip, rsv, SrcHeight, NewWidthE ); - resize4( spv, NewHeight, rsv.pos, rfv -> KernelLen ); - copyOutput4( spv, opd, NewHeight, NewWidthE, IsIOFloat, - Clamp ); - - ip += 4; - opd += 4; - } - } - else - if( ElCount == 2 ) - { - for( i = 0; i < NewWidth; i++ ) - { - copyScanline2v( ip, rsv, SrcHeight, NewWidthE ); - resize2( spv, NewHeight, rsv.pos, rfv -> KernelLen ); - copyOutput2( spv, opd, NewHeight, NewWidthE, IsIOFloat, - Clamp ); - - ip += 2; - opd += 2; - } - } - } - -protected: - float* FltBuf; ///< Intermediate resizing buffer. - ///< - size_t FltBufLen; ///< Intermediate resizing buffer length. - ///< - float* spv; ///< Scanline buffer for vertical resizing. - ///< - unsigned spvlen; ///< Length of "spv". - ///< - - /** - * Function rounds a value and applies clamping. - * - * @param v Value to round and clamp. - * @param Clamp High clamp level, low level is 0. - */ - static int roundclamp( const float v, const int Clamp ) -#define roundclamp(V, CLAMP) ((CLAMP)==255?MAX(0, MIN(255, lrintf(V))):(roundclamp)(V, CLAMP)) - { - if( Clamp == 255 ) { - return MAX(0, MIN(255, lrintf(v))); - } else { - if( v <= 0.0f ) - { - return( 0 ); - } - const int vr = (int) ( v + 0.5f ); - if( vr > Clamp ) - { - return( Clamp ); - } - return( vr ); - } - } - - /** - * Function performs final output of the resized scanline data to the - * destination image buffer. Variants for 1-4-channel image. - * - * @param ip Input resized scanline. - * @param op Output image buffer. - * @param l Pixel count. - * @param opinc "op" increment, should account ElCount. - * @param IsIOFloat "True" if float output and no clamping is necessary. - * @param Clamp Clamp high level, used if IsIOFloat is "false". - */ - - template< class T > - static void copyOutput1( const float* ip, T* op, int l, const int opinc, - const bool IsIOFloat, const int Clamp ) - { - if( IsIOFloat ) - { - while( l > 0 ) - { - op[ 0 ] = (T) ip[ 0 ]; - ip++; - op += opinc; - l--; - } - } - else - { - while( l > 0 ) - { - op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); - ip++; - op += opinc; - l--; - } - } - } - - template< class T > - static void copyOutput2( const float* ip, T* op, int l, const int opinc, - const bool IsIOFloat, const int Clamp ) - { - if( IsIOFloat ) - { - while( l > 0 ) - { - op[ 0 ] = (T) ip[ 0 ]; - op[ 1 ] = (T) ip[ 1 ]; - ip += 2; - op += opinc; - l--; - } - } - else - { - while( l > 0 ) - { - op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); - op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp ); - ip += 2; - op += opinc; - l--; - } - } - } - - template< class T > - static void copyOutput3( const float* ip, T* op, int l, const int opinc, - const bool IsIOFloat, const int Clamp ) - { - if( IsIOFloat ) - { - while( l > 0 ) - { - op[ 0 ] = (T) ip[ 0 ]; - op[ 1 ] = (T) ip[ 1 ]; - op[ 2 ] = (T) ip[ 2 ]; - ip += 3; - op += opinc; - l--; - } - } - else - { - while( l > 0 ) - { - op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); - op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp ); - op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp ); - ip += 3; - op += opinc; - l--; - } - } - } - - template< class T > - static void copyOutput4( const float* ip, T* op, int l, const int opinc, - const bool IsIOFloat, const int Clamp ) - { - if( IsIOFloat ) - { - while( l > 0 ) - { - op[ 0 ] = (T) ip[ 0 ]; - op[ 1 ] = (T) ip[ 1 ]; - op[ 2 ] = (T) ip[ 2 ]; - op[ 3 ] = (T) ip[ 3 ]; - ip += 4; - op += opinc; - l--; - } - } - else - { - while( l > 0 ) - { - op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); - op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp ); - op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp ); - op[ 3 ] = (T) roundclamp( ip[ 3 ], Clamp ); - ip += 4; - op += opinc; - l--; - } - } - } - - class CResizeScanline; - - /** - * Class implements fractional delay filter bank calculation. - */ - - class CResizeFilters - { - friend class CResizeScanline; - - public: - int KernelLen; ///< Resampling filter kernel length, taps. Available - ///< after the update() function call. - ///< - - CResizeFilters() - : FilterBuf( NULL ) - , Filters( NULL ) - , Prevla( -1.0 ) - , Prevk( -1.0 ) - , FilterBufLen( 0 ) - , FiltersLen( 0 ) - { - } - - ~CResizeFilters() - { - free( FilterBuf ); - delete[] Filters; - } - - /** - * Function updates the resizing filter bank. - * - * @param la Lanczos "a" parameter value. - * @param k Resizing step. - * @return "True" if update occured and resizing positions should be - * updated unconditionally. - */ - - bool update( const double la, const double k ) - { - if( la == Prevla && k == Prevk ) - { - return( false ); - } - - Prevla = la; - Prevk = k; - - NormFreq = ( k <= 1.0 ? 1.0 : 1.0 / k ); - Freq = LANCIR_PI * NormFreq; - - if( Freq > LANCIR_PI ) - { - Freq = LANCIR_PI; - } - - FreqA = LANCIR_PI * NormFreq / la; - Len2 = la / NormFreq; - fl2 = (int) ceil( Len2 ); - KernelLen = fl2 + fl2; - - FracCount = 607; // For 8-bit precision. - FracFill = 0; - - const int FilterBufLenNew = FracCount * KernelLen; - - if( FilterBufLenNew > FilterBufLen ) - { - free( FilterBuf ); - FilterBufLen = FilterBufLenNew; - FilterBuf = (float *) memalign( 32, sizeof(float) * FilterBufLen ); - CHECK_NOTNULL(FilterBuf); - } - - if( FracCount > FiltersLen ) - { - delete[] Filters; - FiltersLen = FracCount; - Filters = new float*[ FiltersLen ]; - } - - memset( Filters, 0, FracCount * sizeof( float* )); - - return( true ); - } - - /** - * Function returns filter at the specified fractional offset. This - * function can only be called before the prior update() function - * call. - * - * @param x Fractional offset, [0; 1). - */ - - float* getFilter( const double x ) - { - const int Frac = (int) floor( x * FracCount ); - - if( Filters[ Frac ] == NULL ) - { - Filters[ Frac ] = FilterBuf + FracFill * KernelLen; - FracFill++; - makeFilter( 1.0 - (double) Frac / FracCount, Filters[ Frac ]); - normalizeFilter( Filters[ Frac ]); - } - - return( Filters[ Frac ]); - } - - protected: - double NormFreq; ///< Normalized frequency of the filter. - ///< - double Freq; ///< Circular frequency of the filter. - ///< - double FreqA; ///< Circular frequency of the window function. - ///< - double Len2; ///< Half resampling filter length, unrounded. - ///< - int fl2; ///< Half resampling length, integer. - ///< - int FracCount; ///< The number of fractional positions for which - ///< filters are created. - ///< - int FracFill; ///< The number of fractional positions filled in the - ///< filter buffer. - ///< - float* FilterBuf; ///< Buffer that holds all filters. - ///< - float** Filters; ///< Fractional delay filters for all positions. - ///< Filter pointers equal NULL if filter was not yet created. - ///< - double Prevla; ///< Previous "la". - ///< - double Prevk; ///< Previous "k". - ///< - int FilterBufLen; ///< Allocated length of FilterBuf in elements. - ///< - int FiltersLen; ///< Allocated length of Filters in elements. - ///< - - /** - * @brief Sine signal generator class. - * - * Class implements sine signal generator without biasing, with - * constructor-based initalization only. This generator uses - * oscillator instead of "sin" function. - */ - - class CSinGen - { - public: - /** - * Constructor initializes *this sine signal generator. - * - * @param si Sine function increment, in radians. - * @param ph Starting phase, in radians. Add 0.5 * LANCIR_PI for - * cosine function. - * @param g Gain value. - */ - - CSinGen( const double si, const double ph, const double g = 1.0 ) - : svalue1( sin( ph ) * g ) - , svalue2( sin( ph - si ) * g ) - , sincr( 2.0 * cos( si )) - { - } - - /** - * @return The next value of the sine function, without biasing. - */ - - double generate() - { - const double res = svalue1; - - svalue1 = sincr * res - svalue2; - svalue2 = res; - - return( res ); - } - - private: - double svalue1; ///< Current sine value. - ///< - double svalue2; ///< Previous sine value. - ///< - double sincr; ///< Sine value increment. - ///< - }; - - /** - * Function creates filter for the specified fractional delay. The - * update() function should be called prior to calling this function. - * - * @param FracDelay Fractional delay, 0 to 1, inclusive. - * @param[out] Output filter buffer. - * @tparam T Output buffer type. - */ - - template< class T > - void makeFilter( const double FracDelay, T* op ) const - { - CSinGen f( Freq, Freq * ( FracDelay - fl2 )); - CSinGen fw( FreqA, FreqA * ( FracDelay - fl2 ), Len2 ); - - int t = -fl2; - - if( t + FracDelay < -Len2 ) - { - f.generate(); - fw.generate(); - *op = (T) 0.0; - op++; - t++; - } - - int mt = ( FracDelay >= 1.0 - 1e-13 && FracDelay <= 1.0 + 1e-13 ? - -1 : 0 ); - - while( t < mt ) - { - double ut = ( t + FracDelay ) * LANCIR_PI; - *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); - op++; - t++; - } - - double ut = t + FracDelay; - - if( fabs( ut ) <= 1e-13 ) - { - *op = (T) NormFreq; - f.generate(); - fw.generate(); - } - else - { - ut *= LANCIR_PI; - *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); - } - - mt = fl2 - 2; - - while( t < mt ) - { - op++; - t++; - ut = ( t + FracDelay ) * LANCIR_PI; - *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); - } - - op++; - t++; - ut = t + FracDelay; - - if( ut > Len2 ) - { - *op = (T) 0.0; - } - else - { - ut *= LANCIR_PI; - *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); - } - } - - /** - * Function normalizes the specified filter so that it has unity gain - * at DC. - * - * @param p Filter buffer pointer. - * @tparam T Filter buffer type. - */ - - template< class T > - void normalizeFilter( T* const p ) const - { - double s = 0.0; - size_t i; - - for( i = 0; i < KernelLen; i++ ) - { - s += p[ i ]; - } - - s = 1.0 / s; - - for( i = 0; i < KernelLen; i++ ) - { - p[ i ] = (T) ( p[ i ] * s ); - } - } - }; - - /** - * Structure defines source scanline positioning and filters for each - * destination pixel. - */ - - struct CResizePos - { - const float* ip; ///< Source image pixel pointer. - ///< - float* flt; ///< Fractional delay filter. - ///< - }; - - /** - * Class contains resizing positioning and a temporary scanline buffer, - * prepares source scanline positions for resize filtering. - */ - - class CResizeScanline - { - public: - int padl; ///< Left-padding (in pixels) required for source scanline. - ///< Available after the update() function call. - ///< - int padr; ///< Right-padding (in pixels) required for source scanline. - ///< Available after the update() function call. - ///< - float* sp; ///< Source scanline buffer, with "padl" and "padr" - ///< padding. - ///< - CResizePos* pos; ///< Source scanline pointers (point to "sp") - ///< and filters for each destination pixel position. Available - ///< after the update() function call. - ///< - - CResizeScanline() - : sp( NULL ) - , pos( NULL ) - , PrevSrcLen( -1 ) - , PrevDstLen( -1 ) - , Prevk( 0.0 ) - , Prevo( 0.0 ) - , PrevElCount( 0 ) - , splen( 0 ) - , poslen( 0 ) - { - } - - ~CResizeScanline() - { - free( sp ); - delete[] pos; - } - - /** - * Function "resets" *this object so that the next update() call fully - * updates the position buffer. Reset is necessary if the filter - * object was updated. - */ - - void reset() - { - PrevSrcLen = -1; - } - - /** - * Function updates resizing positions, updates "padl", "padr" and - * "pos" buffer. - * - * @param k Resizing step. - * @param o0 Initial source image offset. - * @param SrcLen Source image scanline length, used to create a - * scanline buffer without length pre-calculation. - * @param DstLen Destination image scanline length. - * @param rf Resizing filters object. - */ - - void update( const double k, const double o0, const int ElCount, - const int SrcLen, const size_t DstLen, CResizeFilters& rf ) - { - if( SrcLen == PrevSrcLen && DstLen == PrevDstLen && - k == Prevk && o0 == Prevo && ElCount == PrevElCount ) - { - return; - } - - PrevSrcLen = SrcLen; - PrevDstLen = DstLen; - Prevk = k; - Prevo = o0; - PrevElCount = ElCount; - - const int fl2m1 = rf.fl2 - 1; - padl = fl2m1 - (int) floor( o0 ); - - if( padl < 0 ) - { - padl = 0; - } - - padr = (int) floor( o0 + k * ( DstLen - 1 )) + rf.fl2 + 1 - - SrcLen; - - if( padr < 0 ) - { - padr = 0; - } - - const int splennew = ( padl + SrcLen + padr ) * ElCount; - - if( splennew > splen ) - { - free( sp ); - splen = splennew; - sp = (float *) memalign( 32, sizeof(float) * splen ); - CHECK_NOTNULL(sp); - } - - if( DstLen > poslen ) - { - delete[] pos; - poslen = DstLen; - pos = new CResizePos[ poslen ]; - } - - const float* const spo = sp + ( padl - fl2m1 ) * ElCount; - size_t i; - - for( i = 0; i < DstLen; i++ ) - { - const double o = o0 + k * i; - const int ix = (int) floor( o ); - pos[ i ].ip = spo + ix * ElCount; - pos[ i ].flt = rf.getFilter( o - ix ); - } - } - - protected: - int PrevSrcLen; ///< Previous SrcLen. - ///< - int PrevDstLen; ///< Previous DstLen. - ///< - double Prevk; ///< Previous "k". - ///< - double Prevo; ///< Previous "o". - ///< - int PrevElCount; ///< Previous pixel element count. - ///< - int splen; ///< Allocated "sp" buffer length. - ///< - int poslen; ///< Allocated "pos" buffer length. - ///< - }; - - CResizeFilters rfh; ///< Resizing filters for horizontal resizing. - ///< - CResizeFilters rfv0; ///< Resizing filters for vertical resizing (may not - ///< be in use). - ///< - CResizeScanline rsh; ///< Horizontal resize scanline. - ///< - CResizeScanline rsv; ///< Vertical resize scanline. - ///< - - /** - * Function copies scanline from the source buffer in its native format - * to internal scanline buffer, in preparation for horizontal resizing. - * Variants for 1-4-channel images. - * - * @param ip Source scanline buffer. - * @param rs Scanline resizing positions object. - * @param l Source scanline length, in pixels. - * @param ipinc "ip" increment per pixel. - */ - - template< class T > - static void copyScanline1h( const T* ip, CResizeScanline& rs, const int l ) - { - float* op = rs.sp; - int i; - - DCHECK_ALIGNED(32, op); - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op++; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - ip++; - op++; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op++; - } - } - - template< class T > - static void copyScanline2h( const T* ip, CResizeScanline& rs, const int l ) - { - float* op = rs.sp; - int i; - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op += 2; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - ip += 2; - op += 2; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op += 2; - } - } - - template< class T > - static void copyScanline3h( const T* ip, CResizeScanline& rs, const int l ) - { - float* op = rs.sp; - int i; - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op += 3; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - ip += 3; - op += 3; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op += 3; - } - } - - template< class T > - static void copyScanline4h( const T* ip, CResizeScanline& rs, const size_t l ) - { - float* op = rs.sp; - size_t i; - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - op += 4; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - ip += 4; - op += 4; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - op += 4; - } - } - - /** - * Function copies scanline from the source buffer in its native format - * to internal scanline buffer, in preparation for vertical resizing. - * Variants for 1-4-channel images. - * - * @param ip Source scanline buffer. - * @param rs Scanline resizing positions object. - * @param l Source scanline length, in pixels. - * @param ipinc "ip" increment per pixel. - */ - - template< class T > - static void copyScanline1v( const T* ip, CResizeScanline& rs, const int l, - const int ipinc ) - { - float* op = rs.sp; - int i; - - DCHECK_ALIGNED(32, op); - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op++; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - ip += ipinc; - op++; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op++; - } - } - - template< class T > - static void copyScanline2v( const T* ip, CResizeScanline& rs, const int l, - const int ipinc ) - { - float* op = rs.sp; - int i; - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op += 2; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - ip += ipinc; - op += 2; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op += 2; - } - } - - template< class T > - static void copyScanline3v( const T* ip, CResizeScanline& rs, const int l, - const int ipinc ) - { - float* op = rs.sp; - int i; - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op += 3; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - ip += ipinc; - op += 3; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op += 3; - } - } - - template< class T > - static void copyScanline4v( const T* ip, CResizeScanline& rs, const size_t l, - const int ipinc ) - { - float* op = rs.sp; - size_t i; - - for( i = 0; i < rs.padl; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - op += 4; - } - - for( i = 0; i < l - 1; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - ip += ipinc; - op += 4; - } - - for( i = 0; i <= rs.padr; i++ ) - { - op[ 0 ] = ip[ 0 ]; - op[ 1 ] = ip[ 1 ]; - op[ 2 ] = ip[ 2 ]; - op[ 3 ] = ip[ 3 ]; - op += 4; - } - } - - #define LANCIR_LF_PRE \ - CResizePos* const rpe = rp + DstLen; \ - while( rp < rpe ) \ - { \ - const float* ip = rp -> ip; \ - const float* const flt = rp -> flt; - - #define LANCIR_LF_POST \ - rp++; \ - } - - static void resize1_kl6( float* op, int DstLen, CResizePos* rp ) - { - LANCIR_LF_PRE - op[ 0 ] = - flt[ 0 ] * ip[ 0 ] + - flt[ 1 ] * ip[ 1 ] + - flt[ 2 ] * ip[ 2 ] + - flt[ 3 ] * ip[ 3 ] + - flt[ 4 ] * ip[ 4 ] + - flt[ 5 ] * ip[ 5 ]; - op++; - LANCIR_LF_POST - } - - static void resize1_kln( float* op, int DstLen, CResizePos* rp, const int kl ) - { - LANCIR_LF_PRE - float sum = 0.0; - int i; - for( i = 0; i < kl; i++ ) - { - sum += flt[ i ] * ip[ i ]; - } - op[ 0 ] = sum; - op++; - LANCIR_LF_POST - } - - /** - * Function performs internal scanline resizing. Variants for 1-4-channel - * images. - * - * @param op Destination buffer. - * @param DstLen Destination length, in pixels. - * @param rp Resizing positions and filters. - * @param kl Filter kernel length, in taps. - */ - static void resize1( float* op, int DstLen, CResizePos* rp, const int kl ) - { - if( kl == 6 ) - { - resize1_kl6( op, DstLen, rp ); - } - else - { - resize1_kln( op, DstLen, rp, kl ); - } - } - - static void resize2( float* op, int DstLen, CResizePos* rp, const int kl ) - { - if( kl == 6 ) - { - LANCIR_LF_PRE - op[ 0 ] = - flt[ 0 ] * ip[ 0 ] + - flt[ 1 ] * ip[ 2 ] + - flt[ 2 ] * ip[ 4 ] + - flt[ 3 ] * ip[ 6 ] + - flt[ 4 ] * ip[ 8 ] + - flt[ 5 ] * ip[ 10 ]; - - op[ 1 ] = - flt[ 0 ] * ip[ 1 ] + - flt[ 1 ] * ip[ 3 ] + - flt[ 2 ] * ip[ 5 ] + - flt[ 3 ] * ip[ 7 ] + - flt[ 4 ] * ip[ 9 ] + - flt[ 5 ] * ip[ 11 ]; - - op += 2; - LANCIR_LF_POST - } - else - { - LANCIR_LF_PRE - float sum[ 2 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - int i; - - for( i = 0; i < kl; i++ ) - { - const float xx = flt[ i ]; - sum[ 0 ] += xx * ip[ 0 ]; - sum[ 1 ] += xx * ip[ 1 ]; - ip += 2; - } - - op[ 0 ] = sum[ 0 ]; - op[ 1 ] = sum[ 1 ]; - op += 2; - LANCIR_LF_POST - } - } - - static void resize3( float* op, int DstLen, CResizePos* rp, const int kl ) - { - if( kl == 6 ) - { - LANCIR_LF_PRE - op[ 0 ] = - flt[ 0 ] * ip[ 0 ] + - flt[ 1 ] * ip[ 3 ] + - flt[ 2 ] * ip[ 6 ] + - flt[ 3 ] * ip[ 9 ] + - flt[ 4 ] * ip[ 12 ] + - flt[ 5 ] * ip[ 15 ]; - - op[ 1 ] = - flt[ 0 ] * ip[ 1 ] + - flt[ 1 ] * ip[ 4 ] + - flt[ 2 ] * ip[ 7 ] + - flt[ 3 ] * ip[ 10 ] + - flt[ 4 ] * ip[ 13 ] + - flt[ 5 ] * ip[ 16 ]; - - op[ 2 ] = - flt[ 0 ] * ip[ 2 ] + - flt[ 1 ] * ip[ 5 ] + - flt[ 2 ] * ip[ 8 ] + - flt[ 3 ] * ip[ 11 ] + - flt[ 4 ] * ip[ 14 ] + - flt[ 5 ] * ip[ 17 ]; - - op += 3; - LANCIR_LF_POST - } - else - { - LANCIR_LF_PRE - float sum[ 3 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - sum[ 2 ] = 0.0; - int i; - - for( i = 0; i < kl; i++ ) - { - const float xx = flt[ i ]; - sum[ 0 ] += xx * ip[ 0 ]; - sum[ 1 ] += xx * ip[ 1 ]; - sum[ 2 ] += xx * ip[ 2 ]; - ip += 3; - } - - op[ 0 ] = sum[ 0 ]; - op[ 1 ] = sum[ 1 ]; - op[ 2 ] = sum[ 2 ]; - op += 3; - LANCIR_LF_POST - } - } - - static optimizespeed void resize4( float* op, int DstLen, CResizePos* rp, const size_t kl ) - { - LANCIR_LF_PRE - float sum[ 4 ]; - sum[ 0 ] = 0.0; - sum[ 1 ] = 0.0; - sum[ 2 ] = 0.0; - sum[ 3 ] = 0.0; - size_t i; - - for( i = 0; i < kl; i++ ) - { - const float xx = flt[ i ]; - sum[ 0 ] += xx * ip[ 0 ]; - sum[ 1 ] += xx * ip[ 1 ]; - sum[ 2 ] += xx * ip[ 2 ]; - sum[ 3 ] += xx * ip[ 3 ]; - ip += 4; - } - - op[ 0 ] = sum[ 0 ]; - op[ 1 ] = sum[ 1 ]; - op[ 2 ] = sum[ 2 ]; - op[ 3 ] = sum[ 3 ]; - op += 4; - LANCIR_LF_POST - } - - #undef LANCIR_LF_PRE - #undef LANCIR_LF_POST - - static relegated nooptimize noinline void handleEmptySrcCornerCase( - void * const NewBuf, const size_t Size ) - { - memset( NewBuf, 0, Size ); - } -}; - -#undef LANCIR_PI - -} // namespace avir - -#endif // AVIR_CLANCIR_INCLUDED diff --git a/third_party/avir/lanczos.cc b/third_party/avir/lanczos.cc deleted file mode 100644 index ae4c8b49..00000000 --- a/third_party/avir/lanczos.cc +++ /dev/null @@ -1,40 +0,0 @@ -/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ -│vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/limits.h" -#include "libc/log/check.h" -#include "libc/log/log.h" -#include "third_party/avir/lanczos.h" -namespace { -#include "third_party/avir/lancir.h" -} // namespace - -/** - * Does Lanczos interpolation. - * @note computers w/o AVX2+FMA need to call BilinearScale() - */ -void lanczos(unsigned dyn, unsigned dxn, void *restrict dst, unsigned syn, - unsigned sxn, const void *restrict src, unsigned sw) { - avir::CLancIR lanczos; - DCHECK_ALIGNED(64, dst); - DCHECK_ALIGNED(64, src); - LOGF("%10s%5zux×%-5zu→%5zu×%-5zu", "lanczos", sxn, syn, dxn, dyn); - lanczos.resizeImage((const float *)src, sxn, syn, sw, (float *)dst, dxn, dyn, - 4); -} diff --git a/third_party/avir/lanczos.h b/third_party/avir/lanczos.h deleted file mode 100644 index 18395a7e..00000000 --- a/third_party/avir/lanczos.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS_H_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -void lanczos(unsigned, unsigned, void *, unsigned, unsigned, const void *, - unsigned); -void lanczos3(unsigned, unsigned, void *, unsigned, unsigned, const void *, - unsigned); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS_H_ */ diff --git a/third_party/avir/lanczos1.cc b/third_party/avir/lanczos1.cc deleted file mode 100644 index af2c31d2..00000000 --- a/third_party/avir/lanczos1.cc +++ /dev/null @@ -1,77 +0,0 @@ -/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ -│vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/xmmintrin.internal.h" -#include "libc/limits.h" -#include "libc/log/log.h" -#include "libc/runtime/runtime.h" -#include "third_party/avir/lanczos1.h" -namespace { -#include "third_party/avir/lanczos1.hpp" -} // namespace - -void lanczos1init(struct lanczos1 *resizer) { - lanczos1free(resizer); - resizer->p = new Lanczos1Impl; -} - -void lanczos1free(struct lanczos1 *resizer) { - Lanczos1Impl *impl; - if (!resizer->p) return; - impl = (Lanczos1Impl *)resizer->p; - delete impl; - resizer->p = nullptr; -} - -/** - * Resizes image plane w/ Lanczos interpolation, e.g. - * - * struct lanczos1 scaler = {0}; - * lanczos1init(&scaler); - * lanczos1(&scaler, ...); - * lanczos1free(&scaler); - * - * @param dyn is destination height - * @param dxn is destination width - * @param dst is destination unsigned char array - * @param dstsize is number of bytes in dst - * @param syn is source height - * @param sxn is source width - * @param ssw is number of unsigned chars per scanline in src - * @param src is source unsigned char array - * @param srcsize is number of bytes in src - */ -void lanczos1(struct lanczos1 *resizer, size_t dyn, size_t dxn, void *dst, - size_t dstsize, size_t syn, size_t sxn, size_t ssw, - const void *src, size_t srcsize) { - Lanczos1Impl *impl; - unsigned int roundhouse; - LOGF("%10s%5zux×%-5zu→%5zu×%-5zu", "lanczos1", sxn, syn, dxn, dyn); - CHECK_LE(dstsize, INT_MAX); - CHECK_LE(srcsize, INT_MAX); - CHECK_LE(sizeof(unsigned char) * 1 * dyn * dxn, dstsize); - CHECK_LE(sizeof(unsigned char) * 1 * syn * sxn, srcsize); - CHECK_LE(sizeof(unsigned char) * syn * ssw, srcsize); - roundhouse = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - impl = (Lanczos1Impl *)resizer->p; - impl->lanczos.resizeImage((const unsigned char *)src, sxn, syn, ssw, - (unsigned char *)dst, dxn, dyn, 1); - _MM_SET_ROUNDING_MODE(roundhouse); -} diff --git a/third_party/avir/lanczos1.h b/third_party/avir/lanczos1.h deleted file mode 100644 index 490841ff..00000000 --- a/third_party/avir/lanczos1.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_H_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -struct lanczos1 { - void *p; -}; - -void lanczos1init(struct lanczos1 *self); -void lanczos1free(struct lanczos1 *self); -void lanczos1(struct lanczos1 *self, size_t dyn, size_t dxn, void *dst, - size_t dstsize, size_t syn, size_t sxn, size_t ssw, - const void *src, size_t srcsize) paramsnonnull(); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_H_ */ diff --git a/third_party/avir/lanczos1.hpp b/third_party/avir/lanczos1.hpp deleted file mode 100644 index 07e9bd6a..00000000 --- a/third_party/avir/lanczos1.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_HPP_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_HPP_ -#include "third_party/avir/lancir.h" - -struct Lanczos1Impl { - Lanczos1Impl() : lanczos{} { - } - avir::CLancIR lanczos; -}; - -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_HPP_ */ diff --git a/third_party/avir/lanczos1b.cc b/third_party/avir/lanczos1b.cc deleted file mode 100644 index 9ec2bace..00000000 --- a/third_party/avir/lanczos1b.cc +++ /dev/null @@ -1,31 +0,0 @@ -/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ -│vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/bits.h" -#include "third_party/avir/lanczos1b.h" -namespace { -#include "third_party/avir/lancir.h" -} // namespace - -void lanczos1b(size_t dyn, size_t dxn, unsigned char *restrict dst, size_t syn, - size_t sxn, const unsigned char *restrict src) { - avir::CLancIR lanczos; - LOGF("%10s%5zux×%-5zu→%5zu×%-5zu", "lanczos1b", sxn, syn, dxn, dyn); - lanczos.resizeImage(src, sxn, syn, roundup2pow(sxn) * 4, dst, dxn, dyn, 4); -} diff --git a/third_party/avir/lanczos1b.h b/third_party/avir/lanczos1b.h deleted file mode 100644 index 2e21c734..00000000 --- a/third_party/avir/lanczos1b.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1B_H_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1B_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -void lanczos1b(size_t dyn, size_t dxn, unsigned char *restrict dst, size_t syn, - size_t sxn, const unsigned char *restrict src); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1B_H_ */ diff --git a/third_party/avir/lanczos1f.cc b/third_party/avir/lanczos1f.cc deleted file mode 100644 index 9cead815..00000000 --- a/third_party/avir/lanczos1f.cc +++ /dev/null @@ -1,63 +0,0 @@ -/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ -│vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/xmmintrin.internal.h" -#include "libc/runtime/runtime.h" -#include "third_party/avir/lanczos1f.h" -namespace { -#include "third_party/avir/lanczos1f.hpp" -} // namespace - -void lanczos1finit(struct lanczos1f *resizer) { - lanczos1ffree(resizer); - resizer->p = new Lanczos1fImpl; -} - -void lanczos1ffree(struct lanczos1f *resizer) { - Lanczos1fImpl *impl; - if (!resizer->p) return; - impl = (Lanczos1fImpl *)resizer->p; - delete impl; - resizer->p = nullptr; -} - -/** - * Resizes image plane w/ Lanczos interpolation, e.g. - * - * struct lanczos1f scaler = {0}; - * lanczos1finit(&scaler); - * lanczos1f(&scaler, ...); - * lanczos1ffree(&scaler); - * - * @param dyn is destination height - * @param dxn is destination width - * @param dst is destination unsigned char array - * @param syn is source height - * @param sxn is source width - * @param ssw is number of unsigned chars per scanline in src - * @param src is source unsigned char array - */ -void lanczos1f(struct lanczos1f *resizer, size_t dyn, size_t dxn, void *dst, - size_t syn, size_t sxn, size_t ssw, const void *src, double ky0, - double kx0, double oy, double ox) { - Lanczos1fImpl *impl; - impl = (Lanczos1fImpl *)resizer->p; - impl->lanczos.resizeImage((const float *)src, sxn, syn, ssw, (float *)dst, - dxn, dyn, 1, kx0, ky0, ox, oy); -} diff --git a/third_party/avir/lanczos1f.h b/third_party/avir/lanczos1f.h deleted file mode 100644 index cba61c32..00000000 --- a/third_party/avir/lanczos1f.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_H_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -struct lanczos1f { - void *p; -}; - -void lanczos1finit(struct lanczos1f *); -void lanczos1ffree(struct lanczos1f *); -void lanczos1f(struct lanczos1f *, size_t, size_t, void *, size_t, size_t, - size_t, const void *, double, double, double, double) - paramsnonnull(); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_H_ */ diff --git a/third_party/avir/lanczos1f.hpp b/third_party/avir/lanczos1f.hpp deleted file mode 100644 index cc821058..00000000 --- a/third_party/avir/lanczos1f.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_HPP_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_HPP_ -#include "third_party/avir/lancir.h" - -struct Lanczos1fImpl { - Lanczos1fImpl() : lanczos{} { - } - avir::CLancIR lanczos; -}; - -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_HPP_ */ diff --git a/third_party/avir/lanczos3.cc b/third_party/avir/lanczos3.cc deleted file mode 100644 index d64e8f7f..00000000 --- a/third_party/avir/lanczos3.cc +++ /dev/null @@ -1,30 +0,0 @@ -/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ -│vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "third_party/avir/lanczos.h" -namespace { -#include "third_party/avir/lancir.h" -} - -void lanczos3(unsigned dyn, unsigned dxn, void *dst, unsigned syn, unsigned sxn, - const void *src, unsigned sw) { - avir::CLancIR lanczos; - lanczos.resizeImage((const float *)src, sxn, syn, sw, (float *)dst, dxn, dyn, - 3, -1, -2); -} diff --git a/third_party/avir/notice.h b/third_party/avir/notice.h deleted file mode 100644 index 2ca1f2c4..00000000 --- a/third_party/avir/notice.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_NOTICE_H_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_NOTICE_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) - -asm(".ident\t\"\\n\\n\ -AVIR (MIT License)\\n\ -Copyright 2015-2019 Aleksey Vaneev\""); -asm(".include \"libc/disclaimer.inc\""); - -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_NOTICE_H_ */ diff --git a/third_party/avir/resize.cc b/third_party/avir/resize.cc deleted file mode 100644 index d7d7eba9..00000000 --- a/third_party/avir/resize.cc +++ /dev/null @@ -1,48 +0,0 @@ -/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ -│vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "third_party/avir/resize.h" -namespace { -#include "third_party/avir/avir_float4_sse.h" -} // namespace - -struct ResizerImpl { - ResizerImpl() : resizer{8, 8, avir::CImageResizerParamsULR()} {} - avir::CImageResizer resizer; -}; - -void NewResizer(Resizer *resizer, int aResBitDepth, int aSrcBitDepth) { - FreeResizer(resizer); - resizer->p = new ResizerImpl(); -} - -void FreeResizer(Resizer *resizer) { - if (!resizer->p) return; - delete (ResizerImpl *)resizer->p; - resizer->p = nullptr; -} - -void ResizeImage(Resizer *resizer, float *Dest, int DestHeight, int DestWidth, - const float *Src, int SrcHeight, int SrcWidth) { - ResizerImpl *impl = (ResizerImpl *)resizer->p; - int SrcScanLineSize = 0; - double ResizingStep = 0; - impl->resizer.resizeImage(Src, SrcWidth, SrcHeight, SrcScanLineSize, Dest, - DestWidth, DestHeight, 4, ResizingStep); -} diff --git a/third_party/avir/resize.h b/third_party/avir/resize.h deleted file mode 100644 index 6d4871bf..00000000 --- a/third_party/avir/resize.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_RESIZE_H_ -#define COSMOPOLITAN_THIRD_PARTY_AVIR_RESIZE_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -struct Resizer { - void *p; -}; - -void FreeResizer(struct Resizer *) paramsnonnull(); -void NewResizer(struct Resizer *, int, int) paramsnonnull(); -void ResizeImage(struct Resizer *, float *, int, int, const float *, int, int) - paramsnonnull(); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_RESIZE_H_ */ diff --git a/third_party/chibicc/as.c b/third_party/chibicc/as.c index 4d06bb62..b4c58e5d 100644 --- a/third_party/chibicc/as.c +++ b/third_party/chibicc/as.c @@ -2707,15 +2707,6 @@ static noinline void OpFpu1(struct As *a, int op, int reg) { OpFpu1Impl(a, op, reg); } -static void OnFucomi(struct As *a, struct Slice s) { - int reg, rm; - rm = !IsPunct(a, a->i, ';') ? GetRegisterRm(a) : 1; - reg = !IsPunct(a, a->i, ';') ? GetRegisterReg(a) : 0; - if (reg & 7) Fail(a, "bad register"); - EmitByte(a, 0xDB); - EmitByte(a, 0350 | rm & 7); -} - static void OnFxch(struct As *a, struct Slice s) { int rm; rm = !IsPunct(a, a->i, ';') ? GetRegisterRm(a) : 1; @@ -2731,6 +2722,18 @@ static void OnBswap(struct As *a, struct Slice s) { EmitByte(a, 0310 | srm & 7); } +static noinline void OpFcomImpl(struct As *a, int op) { + int reg, rm; + rm = !IsPunct(a, a->i, ';') ? GetRegisterRm(a) : 1; + reg = !IsPunct(a, a->i, ';') ? GetRegisterReg(a) : 0; + if (reg & 7) Fail(a, "bad register"); + EmitVarword(a, op | rm & 7); +} + +static noinline void OpFcom(struct As *a, int op) { + OpFcomImpl(a, op); +} + // clang-format off static void OnAdc(struct As *a, struct Slice s) { OpAlu(a, s, 2); } static void OnAdd(struct As *a, struct Slice s) { OpAlu(a, s, 0); } @@ -2803,13 +2806,22 @@ static void OnDivps(struct As *a, struct Slice s) { OpSse(a, 0x0F5E); } static void OnDivsd(struct As *a, struct Slice s) { OpSse(a, 0xF20F5E); } static void OnDivss(struct As *a, struct Slice s) { OpSse(a, 0xF30F5E); } static void OnDppd(struct As *a, struct Slice s) { OpSse(a, 0x660F3A41); } -static void OnFabs(struct As *a, struct Slice s) { EmitVarword(a, 0xd9e1); } +static void OnFabs(struct As *a, struct Slice s) { EmitVarword(a, 0xD9E1); } static void OnFaddl(struct As *a, struct Slice s) { OpFpu1(a, 0xDC, 0); } -static void OnFaddp(struct As *a, struct Slice s) { EmitVarword(a, 0xdec1); } +static void OnFaddp(struct As *a, struct Slice s) { EmitVarword(a, 0xDEC1); } static void OnFadds(struct As *a, struct Slice s) { OpFpu1(a, 0xD8, 0); } -static void OnFchs(struct As *a, struct Slice s) { EmitVarword(a, 0xd9e0); } -static void OnFcomip(struct As *a, struct Slice s) { EmitVarword(a, 0xdff1); } -static void OnFdivrp(struct As *a, struct Slice s) { EmitVarword(a, 0xdef9); } +static void OnFchs(struct As *a, struct Slice s) { EmitVarword(a, 0xD9E0); } +static void OnFcmovb(struct As *a, struct Slice s) { OpFcom(a, 0xDAC0); } +static void OnFcmovbe(struct As *a, struct Slice s) { OpFcom(a, 0xDAD0); } +static void OnFcmove(struct As *a, struct Slice s) { OpFcom(a, 0xDAC8); } +static void OnFcmovnb(struct As *a, struct Slice s) { OpFcom(a, 0xDBC0); } +static void OnFcmovnbe(struct As *a, struct Slice s) { OpFcom(a, 0xDBD0); } +static void OnFcmovne(struct As *a, struct Slice s) { OpFcom(a, 0xDBC8); } +static void OnFcmovnu(struct As *a, struct Slice s) { OpFcom(a, 0xDBD8); } +static void OnFcmovu(struct As *a, struct Slice s) { OpFcom(a, 0xDAD8); } +static void OnFcomi(struct As *a, struct Slice s) { OpFcom(a, 0xDBF0); } +static void OnFcomip(struct As *a, struct Slice s) { OpFcom(a, 0xDFF0); } +static void OnFdivrp(struct As *a, struct Slice s) { EmitVarword(a, 0xDEF9); } static void OnFildl(struct As *a, struct Slice s) { OpFpu1(a, 0xDB, 0); } static void OnFildll(struct As *a, struct Slice s) { OpFpu1(a, 0xDF, 5); } static void OnFildq(struct As *a, struct Slice s) { OpFpu1(a, 0xDF, 5); } @@ -2837,9 +2849,11 @@ static void OnFstps(struct As *a, struct Slice s) { OpFpu1(a, 0xD9, 3); } static void OnFstpt(struct As *a, struct Slice s) { OpFpu1(a, 0xDB, 7); } static void OnFsubrp(struct As *a, struct Slice s) { EmitVarword(a, 0xDEE9); } static void OnFtst(struct As *a, struct Slice s) { EmitVarword(a, 0xD9E4); } -static void OnFucomip(struct As *a, struct Slice s) { EmitVarword(a, 0xDFE9); } +static void OnFucomi(struct As *a, struct Slice s) { OpFcom(a, 0xDBE8); } +static void OnFucomip(struct As *a, struct Slice s) { OpFcom(a, 0xDFE8); } static void OnFwait(struct As *a, struct Slice s) { EmitByte(a, 0x9B); } -static void OnFxam(struct As *a, struct Slice s) { EmitVarword(a, 0xd9e5); } +static void OnFxam(struct As *a, struct Slice s) { EmitVarword(a, 0xD9E5); } +static void OnFxtract(struct As *a, struct Slice s) { EmitVarword(a, 0xD9F4); } static void OnHaddpd(struct As *a, struct Slice s) { OpSse(a, 0x660F7C); } static void OnHaddps(struct As *a, struct Slice s) { OpSse(a, 0xF20F7C); } static void OnHlt(struct As *a, struct Slice s) { EmitByte(a, 0xF4); } @@ -3211,6 +3225,15 @@ static const struct Directive8 { {"faddp", OnFaddp}, // {"fadds", OnFadds}, // {"fchs", OnFchs}, // + {"fcmovb", OnFcmovb}, // + {"fcmovbe", OnFcmovbe}, // + {"fcmove", OnFcmove}, // + {"fcmovnb", OnFcmovnb}, // + {"fcmovnbe", OnFcmovnbe}, // + {"fcmovne", OnFcmovne}, // + {"fcmovnu", OnFcmovnu}, // + {"fcmovu", OnFcmovu}, // + {"fcomi", OnFcomi}, // {"fcomip", OnFcomip}, // {"fdivrp", OnFdivrp}, // {"fildl", OnFildl}, // @@ -3246,6 +3269,7 @@ static const struct Directive8 { {"fwait", OnFwait}, // {"fxam", OnFxam}, // {"fxch", OnFxch}, // + {"fxtract", OnFxtract}, // {"haddpd", OnHaddpd}, // {"haddps", OnHaddps}, // {"hlt", OnHlt}, // diff --git a/third_party/chibicc/chibicc.c b/third_party/chibicc/chibicc.c index fd6ce4dd..fa113f1d 100644 --- a/third_party/chibicc/chibicc.c +++ b/third_party/chibicc/chibicc.c @@ -1,3 +1,5 @@ +#include "libc/calls/struct/siginfo.h" +#include "libc/calls/ucontext.h" #include "third_party/chibicc/chibicc.h" asm(".ident\t\"\\n\\n\ @@ -381,10 +383,9 @@ static bool run_subprocess(char **argv) { for (int i = 1; argv[i]; i++) fprintf(stderr, " %s", argv[i]); fprintf(stderr, "\n"); } - if (fork() == 0) { + if (!vfork()) { // Child process. Run a new command. execvp(argv[0], argv); - fprintf(stderr, "exec failed: %s: %s\n", argv[0], strerror(errno)); _exit(1); } // Wait for the child process to finish. @@ -503,7 +504,7 @@ static Token *append_tokens(Token *tok1, Token *tok2) { return tok1; } -static FileType get_file_type(char *filename) { +static FileType get_file_type(const char *filename) { if (opt_x != FILE_NONE) return opt_x; if (endswith(filename, ".a")) return FILE_AR; if (endswith(filename, ".o")) return FILE_OBJ; @@ -514,7 +515,13 @@ static FileType get_file_type(char *filename) { } static void cc1(void) { + FileType ft; Token *tok = NULL; + ft = get_file_type(base_file); + if (opt_J && (ft == FILE_ASM || ft == FILE_ASM_CPP)) { + output_javadown_asm(output_file, base_file); + return; + } // Process -include option for (int i = 0; i < opt_include.len; i++) { char *incl = opt_include.data[i]; @@ -538,7 +545,7 @@ static void cc1(void) { if (opt_M) return; } // If -E is given, print out preprocessed C code as a result. - if (opt_E || get_file_type(base_file) == FILE_ASM_CPP) { + if (opt_E || ft == FILE_ASM_CPP) { print_tokens(tok); return; } @@ -605,8 +612,13 @@ static void run_linker(StringArray *inputs, char *output) { handle_exit(run_subprocess(arr.data)); } +static void OnCtrlC(int sig, siginfo_t *si, ucontext_t *ctx) { + exit(1); +} + int chibicc(int argc, char **argv) { showcrashreports(); + sigaction(SIGINT, &(struct sigaction){.sa_sigaction = OnCtrlC}, NULL); atexit(cleanup); init_macros(); parse_args(argc, argv); @@ -649,6 +661,18 @@ int chibicc(int argc, char **argv) { strarray_push(&ld_args, input); continue; } + // Dox + if (opt_J) { + if (opt_c) { + handle_exit(run_cc1(argc, argv, input, output)); + } else { + char *tmp = create_tmpfile(); + if (run_cc1(argc, argv, input, tmp)) { + strarray_push(&dox_args, tmp); + } + } + continue; + } // Handle .s if (type == FILE_ASM) { if (!opt_S) { @@ -657,6 +681,11 @@ int chibicc(int argc, char **argv) { continue; } assert(type == FILE_C || type == FILE_ASM_CPP); + // Just print ast. + if (opt_A) { + handle_exit(run_cc1(argc, argv, input, NULL)); + continue; + } // Just preprocess if (opt_E || opt_M) { handle_exit(run_cc1(argc, argv, input, NULL)); @@ -674,14 +703,6 @@ int chibicc(int argc, char **argv) { assemble(tmp, output); continue; } - // Dox - if (opt_J) { - char *tmp = create_tmpfile(); - if (run_cc1(argc, argv, input, tmp)) { - strarray_push(&dox_args, tmp); - } - continue; - } // Compile, assemble and link char *tmp1 = create_tmpfile(); char *tmp2 = create_tmpfile(); diff --git a/third_party/chibicc/chibicc.h b/third_party/chibicc/chibicc.h index 0e355049..d9beb355 100644 --- a/third_party/chibicc/chibicc.h +++ b/third_party/chibicc/chibicc.h @@ -272,6 +272,7 @@ struct Obj { bool is_destructor; bool is_constructor; bool is_ms_abi; /* TODO */ + bool is_no_instrument_function; bool is_force_align_arg_pointer; bool is_no_caller_saved_registers; int stack_size; @@ -616,10 +617,16 @@ Obj *alloc_obj(void); Type *alloc_type(void); // -// javadown.c +// dox1.c // void output_javadown(const char *, Obj *); +void output_javadown_asm(const char *, const char *); + +// +// dox2.c +// + void drop_dox(const StringArray *, const char *); COSMOPOLITAN_C_END_ diff --git a/third_party/chibicc/chibicc.mk b/third_party/chibicc/chibicc.mk index a3b7c132..06607bb1 100644 --- a/third_party/chibicc/chibicc.mk +++ b/third_party/chibicc/chibicc.mk @@ -116,15 +116,6 @@ o/$(MODE)/third_party/chibicc/as.com.dbg: \ $(THIRD_PARTY_CHIBICC_A).pkg @$(APELINK) -o/$(MODE)/third_party/chibicc/hello.com.dbg: \ - $(THIRD_PARTY_CHIBICC_A_DEPS) \ - $(THIRD_PARTY_CHIBICC_A) \ - $(APE) \ - $(CRT) \ - o/$(MODE)/third_party/chibicc/hello.chibicc.o \ - $(THIRD_PARTY_CHIBICC_A).pkg - @$(APELINK) - o/$(MODE)/third_party/chibicc/chibicc.o: \ CPPFLAGS += $(THIRD_PARTY_CHIBICC_DEFINES) diff --git a/third_party/chibicc/codegen.c b/third_party/chibicc/codegen.c index 608c72f3..378e995c 100644 --- a/third_party/chibicc/codegen.c +++ b/third_party/chibicc/codegen.c @@ -973,6 +973,12 @@ static bool gen_builtin_funcall(Node *node, const char *name) { pop("%rax"); return true; } + } else if (!strcmp(name, "logbl")) { + gen_expr(node->args); + emitlin("\ +\tfxtract\n\ +\tfstp\t%st"); + return true; } else if (!strcmp(name, "isgreater")) { gen_comis(node, "comisd", 1, 0, "a"); return true; @@ -1010,17 +1016,17 @@ static bool gen_builtin_funcall(Node *node, const char *name) { \tflds\t(%rsp)\n\ \tpop\t%rax"); return true; - } else if (!strcmp(name, "inff")) { + } else if (!strcmp(name, "inff") || !strcmp(name, "huge_valf")) { emitlin("\ \tmov\t$0x7f800000,%eax\n\ \tmovd\t%eax,%xmm0"); return true; - } else if (!strcmp(name, "inf")) { + } else if (!strcmp(name, "inf") || !strcmp(name, "huge_val")) { emitlin("\ \tmov\t$0x7ff0000000000000,%rax\n\ \tmovq\t%rax,%xmm0"); return true; - } else if (!strcmp(name, "infl")) { + } else if (!strcmp(name, "infl") || !strcmp(name, "huge_vall")) { emitlin("\ \tpush\t$0x7f800000\n\ \tflds\t(%rsp)\n\ @@ -2304,6 +2310,42 @@ static void store_gp(int r, int offset, int sz) { } } +static void emit_function_hook(void) { + if (opt_nop_mcount) { + print_profiling_nop(); + } else if (opt_fentry) { + emitlin("\tcall\t__fentry__@gotpcrel(%rip)"); + } else if (opt_pg) { + emitlin("\tcall\tmcount@gotpcrel(%rip)"); + } else { + print_profiling_nop(); + } +} + +static void save_caller_saved_registers(void) { + emitlin("\ +\tpush\t%rdi\n\ +\tpush\t%rsi\n\ +\tpush\t%rdx\n\ +\tpush\t%rcx\n\ +\tpush\t%r8\n\ +\tpush\t%r9\n\ +\tpush\t%r10\n\ +\tpush\t%r11"); +} + +static void restore_caller_saved_registers(void) { + emitlin("\ +\tpop\t%r11\n\ +\tpop\t%r10\n\ +\tpop\t%r9\n\ +\tpop\t%r8\n\ +\tpop\t%rcx\n\ +\tpop\t%rdx\n\ +\tpop\t%rsi\n\ +\tpop\t%rdi"); +} + static void emit_text(Obj *prog) { for (Obj *fn = prog; fn; fn = fn->next) { if (!fn->is_function || !fn->is_definition) continue; @@ -2327,14 +2369,8 @@ static void emit_text(Obj *prog) { // Prologue emitlin("\tpush\t%rbp"); emitlin("\tmov\t%rsp,%rbp"); - if (opt_nop_mcount) { - print_profiling_nop(); - } else if (opt_fentry) { - emitlin("\tcall\t__fentry__@gotpcrel(%rip)"); - } else if (opt_pg) { - emitlin("\tcall\tmcount@gotpcrel(%rip)"); - } else { - print_profiling_nop(); + if (!fn->is_no_instrument_function) { + emit_function_hook(); } println("\tsub\t$%d,%%rsp", fn->stack_size); println("\tmov\t%%rsp,%d(%%rbp)", fn->alloca_bottom->offset); @@ -2410,15 +2446,7 @@ static void emit_text(Obj *prog) { emitlin("\tand\t$-16,%rsp"); } if (fn->is_no_caller_saved_registers) { - emitlin("\ -\tpush\t%rdi\n\ -\tpush\t%rsi\n\ -\tpush\t%rdx\n\ -\tpush\t%rcx\n\ -\tpush\t%r8\n\ -\tpush\t%r9\n\ -\tpush\t%r10\n\ -\tpush\t%r11"); + save_caller_saved_registers(); } // Emit code gen_stmt(fn->body); @@ -2436,15 +2464,7 @@ static void emit_text(Obj *prog) { emitlin("\tud2"); } else { if (fn->is_no_caller_saved_registers) { - emitlin("\ -\tpop\t%r11\n\ -\tpop\t%r10\n\ -\tpop\t%r9\n\ -\tpop\t%r8\n\ -\tpop\t%rcx\n\ -\tpop\t%rdx\n\ -\tpop\t%rsi\n\ -\tpop\t%rdi"); + restore_caller_saved_registers(); } emitlin("\tleave"); emitlin("\tret"); diff --git a/third_party/chibicc/dox1.c b/third_party/chibicc/dox1.c index a625d4bc..ffefad3a 100644 --- a/third_party/chibicc/dox1.c +++ b/third_party/chibicc/dox1.c @@ -17,7 +17,11 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/runtime/gc.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/prot.h" #include "third_party/chibicc/chibicc.h" +#include "tool/build/lib/asmdown.h" #define APPEND(L) L.p = realloc(L.p, ++L.n * sizeof(*L.p)) @@ -101,30 +105,34 @@ static char *DescribeType(struct Type *ty) { return DescribeScalar(ty, "double"); case TY_LDOUBLE: return DescribeScalar(ty, "long double"); + case TY_FUNC: + return xasprintf("%s(*)()", gc(DescribeType(ty->return_ty))); case TY_PTR: - return xasprintf("%s*", gc(DescribeType(ty->base))); + if (ty->base->kind == TY_FUNC) { + return DescribeType(ty->base); + } else { + return xasprintf("%s*", gc(DescribeType(ty->base))); + } case TY_ARRAY: return xasprintf("%s[%d]", gc(DescribeType(ty->base)), ty->array_len); case TY_ENUM: - if (ty->name_pos) { - return xasprintf("enum %.*s", ty->name_pos->len, ty->name_pos->loc); + if (ty->name) { + return xasprintf("enum %.*s", ty->name->len, ty->name->loc); } else { return strdup("ANONYMOUS-ENUM"); } case TY_STRUCT: - if (ty->name_pos) { - return xasprintf("struct %.*s", ty->name_pos->len, ty->name_pos->loc); + if (ty->name) { + return xasprintf("struct %.*s", ty->name->len, ty->name->loc); } else { return strdup("ANONYMOUS-STRUCT"); } case TY_UNION: - if (ty->name_pos) { - return xasprintf("union %.*s", ty->name_pos->len, ty->name_pos->loc); + if (ty->name) { + return xasprintf("union %.*s", ty->name->len, ty->name->loc); } else { return strdup("ANONYMOUS-UNION"); } - case TY_FUNC: - return xasprintf("%s(*)()", gc(DescribeType(ty->return_ty))); default: return "UNKNOWN"; } @@ -136,6 +144,12 @@ static int CountParams(Obj *params) { return n; } +static int CountMacroParams(struct MacroParam *params) { + int n; + for (n = 0; params; params = params->next) ++n; + return n; +} + static const char *GetFileName(Obj *obj) { if (obj->javadown && obj->javadown->file) return obj->javadown->file->name; if (obj->tok && obj->tok->file) return obj->tok->file->name; @@ -155,7 +169,9 @@ static void SerializeDox(struct DoxWriter *dox, Obj *prog) { MacroParam *mparam; SerializeInt(&dox->buf, dox->objects.n); for (i = 0; i < dox->objects.n; ++i) { - s = DescribeType(dox->objects.p[i]->ty); + s = DescribeType(dox->objects.p[i]->is_function + ? dox->objects.p[i]->ty->return_ty + : dox->objects.p[i]->ty); SerializeStr(&dox->buf, s); free(s); SerializeStr(&dox->buf, dox->objects.p[i]->name); @@ -170,7 +186,10 @@ static void SerializeDox(struct DoxWriter *dox, Obj *prog) { SerializeInt(&dox->buf, dox->objects.p[i]->is_force_align_arg_pointer); SerializeInt(&dox->buf, dox->objects.p[i]->is_no_caller_saved_registers); SerializeStr(&dox->buf, dox->objects.p[i]->visibility); - SerializeJavadown(&dox->buf, dox->objects.p[i]->javadown->javadown); + SerializeInt(&dox->buf, !!dox->objects.p[i]->javadown); + if (dox->objects.p[i]->javadown) { + SerializeJavadown(&dox->buf, dox->objects.p[i]->javadown->javadown); + } SerializeInt(&dox->buf, CountParams(dox->objects.p[i]->params)); for (param = dox->objects.p[i]->params; param; param = param->next) { s = DescribeType(param->ty); @@ -184,22 +203,93 @@ static void SerializeDox(struct DoxWriter *dox, Obj *prog) { SerializeStr(&dox->buf, dox->macros.p[i]->name); SerializeStr(&dox->buf, dox->macros.p[i]->javadown->file->name); SerializeInt(&dox->buf, dox->macros.p[i]->javadown->line_no); - SerializeJavadown(&dox->buf, dox->macros.p[i]->javadown->javadown); + SerializeInt(&dox->buf, dox->macros.p[i]->is_objlike); + SerializeStr(&dox->buf, dox->macros.p[i]->va_args_name); + SerializeInt(&dox->buf, !!dox->macros.p[i]->javadown); + if (dox->macros.p[i]->javadown) { + SerializeJavadown(&dox->buf, dox->macros.p[i]->javadown->javadown); + } + SerializeInt(&dox->buf, CountMacroParams(dox->macros.p[i]->params)); + for (mparam = dox->macros.p[i]->params; mparam; mparam = mparam->next) { + SerializeStr(&dox->buf, mparam->name); + } } SerializeInt(&dox->buf, 31337); } +static int IsJavadownParam(struct JavadownTag *jt) { + return !strcmp(jt->tag, "param") && strchr(jt->text, ' '); +} + +static char *ExtractJavadownParamName(const char *text) { + char *space; + space = strchr(text, ' '); + return strndup(text, space - text); +} + +static int CountJavadownParams(struct Javadown *jd) { + int i, n; + for (n = i = 0; i < jd->tags.n; ++i) { + if (IsJavadownParam(jd->tags.p + i)) { + ++n; + } + } + return n; +} + +static void SerializeAsmdown(struct DoxWriter *dox, struct Asmdown *ad, + const char *filename) { + char *s; + int i, j; + SerializeInt(&dox->buf, ad->symbols.n); + for (i = 0; i < ad->symbols.n; ++i) { + SerializeStr(&dox->buf, ""); // type + SerializeStr(&dox->buf, ad->symbols.p[i].name); + SerializeStr(&dox->buf, filename); + SerializeInt(&dox->buf, ad->symbols.p[i].line); + SerializeInt(&dox->buf, true); // TODO: is_function + SerializeInt(&dox->buf, false); // TODO: is_weak + SerializeInt(&dox->buf, false); // is_inline + SerializeInt(&dox->buf, false); // is_noreturn + SerializeInt(&dox->buf, false); // is_destructor + SerializeInt(&dox->buf, false); // is_constructor + SerializeInt(&dox->buf, false); // is_force_align_arg_pointer + SerializeInt(&dox->buf, false); // is_no_caller_saved_registers + SerializeStr(&dox->buf, ""); // TODO: visibility + SerializeInt(&dox->buf, true); // has_javadown + SerializeJavadown(&dox->buf, ad->symbols.p[i].javadown); + SerializeInt(&dox->buf, CountJavadownParams(ad->symbols.p[i].javadown)); + for (j = 0; j < ad->symbols.p[i].javadown->tags.n; ++j) { + if (IsJavadownParam(ad->symbols.p[i].javadown->tags.p + j)) { + SerializeStr(&dox->buf, ""); // type + s = ExtractJavadownParamName(ad->symbols.p[i].javadown->tags.p[j].text); + SerializeStr(&dox->buf, s); // name + free(s); + } + } + } + SerializeInt(&dox->buf, 0); // macros + SerializeInt(&dox->buf, 31337); +} + static void LoadPublicDefinitions(struct DoxWriter *dox, Obj *prog) { int i; Obj *obj; Macro *macro; for (obj = prog; obj; obj = obj->next) { + if (!obj->javadown) { + if (*obj->name == '_') continue; + if (strchr(obj->name, '$')) continue; + if (startswith(obj->name, "__gdtoa_")) continue; + if (obj->visibility && !strcmp(obj->visibility, "hidden")) continue; + if (!obj->is_definition && (!obj->is_function || !obj->params || + !obj->params->name || !*obj->params->name)) { + continue; + } + } if (obj->is_static) continue; - if (*obj->name == '_') continue; - if (!obj->javadown) continue; if (obj->is_string_literal) continue; - if (obj->visibility && !strcmp(obj->visibility, "hidden")) continue; - if (strchr(obj->name, '$')) continue; + if (obj->section && startswith(obj->section, ".init_array")) continue; APPEND(dox->objects); dox->objects.p[dox->objects.n - 1] = obj; } @@ -209,8 +299,8 @@ static void LoadPublicDefinitions(struct DoxWriter *dox, Obj *prog) { macro = macros.buckets[i].val; if (!macro->javadown) continue; if (!macro->javadown->javadown) continue; - if (*macro->name == '_') continue; - if (strchr(macro->name, '$')) continue; + /* if (*macro->name == '_') continue; */ + /* if (strchr(macro->name, '$')) continue; */ APPEND(dox->macros); dox->macros.p[dox->macros.n - 1] = macro; } @@ -237,7 +327,7 @@ static void WriteDox(struct DoxWriter *dox, const char *path) { } /** - * Emits documentation datum for compilation unit just parsed. + * Emits documentation data for compilation unit just parsed. */ void output_javadown(const char *path, Obj *prog) { struct DoxWriter *dox = NewDoxWriter(); @@ -246,3 +336,30 @@ void output_javadown(const char *path, Obj *prog) { WriteDox(dox, path); FreeDoxWriter(dox); } + +/** + * Emits documentation data for assembly source file. + */ +void output_javadown_asm(const char *path, const char *source) { + int fd; + void *map; + struct stat st; + struct Asmdown *ad; + struct DoxWriter *dox; + CHECK_NE(-1, (fd = open(source, O_RDONLY))); + CHECK_NE(-1, fstat(fd, &st)); + if (st.st_size) { + CHECK_NE(MAP_FAILED, + (map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0))); + ad = ParseAsmdown(map, st.st_size); + munmap(map, st.st_size); + } else { + ad = ParseAsmdown("", 0); + } + close(fd); + dox = NewDoxWriter(); + SerializeAsmdown(dox, ad, source); + WriteDox(dox, path); + FreeDoxWriter(dox); + FreeAsmdown(ad); +} diff --git a/third_party/chibicc/dox2.c b/third_party/chibicc/dox2.c index 8fa5ba52..58816977 100644 --- a/third_party/chibicc/dox2.c +++ b/third_party/chibicc/dox2.c @@ -65,10 +65,34 @@ struct Dox { } params; } * p; } objects; - struct { + struct DoxMacros { size_t n; - int *p; - } objectindex; + struct DoxMacro { + bool ignore; + char *name; + char *path; + int line; + bool is_objlike; + char *va_args_name; + struct Javadown *javadown; + struct DoxMacroParams { + size_t n; + struct DoxMacroParam { + char *name; + } * p; + } params; + } * p; + } macros; + struct DoxIndex { + size_t n; + struct DoxIndexEntry { + enum DoxIndexType { + kObject, + kMacro, + } t; + int i; + } * p; + } index; }; static unsigned Hash(const void *p, unsigned long n) { @@ -93,6 +117,8 @@ static void FreeDox(struct Dox *dox) { free(dox->names.p); free(dox->freelist.p); free(dox->objects.p); + free(dox->macros.p); + free(dox->index.p); free(dox); } } @@ -124,18 +150,23 @@ static char *DeserializeStr(struct Dox *dox) { static struct Javadown *DeserializeJavadown(struct Dox *dox) { int i; + bool present; struct Javadown *jd; - jd = FreeLater(dox, calloc(1, sizeof(struct Javadown))); - jd->isfileoverview = DeserializeInt(dox); - jd->title = DeserializeStr(dox); - jd->text = DeserializeStr(dox); - jd->tags.n = DeserializeInt(dox); - jd->tags.p = FreeLater(dox, malloc(jd->tags.n * sizeof(*jd->tags.p))); - for (i = 0; i < jd->tags.n; ++i) { - jd->tags.p[i].tag = DeserializeStr(dox); - jd->tags.p[i].text = DeserializeStr(dox); + if (DeserializeInt(dox)) { + jd = FreeLater(dox, calloc(1, sizeof(struct Javadown))); + jd->isfileoverview = DeserializeInt(dox); + jd->title = DeserializeStr(dox); + jd->text = DeserializeStr(dox); + jd->tags.n = DeserializeInt(dox); + jd->tags.p = FreeLater(dox, malloc(jd->tags.n * sizeof(*jd->tags.p))); + for (i = 0; i < jd->tags.n; ++i) { + jd->tags.p[i].tag = DeserializeStr(dox); + jd->tags.p[i].text = DeserializeStr(dox); + } + return jd; + } else { + return NULL; } - return jd; } static void DeserializeObject(struct Dox *dox, struct DoxObject *o) { @@ -163,6 +194,22 @@ static void DeserializeObject(struct Dox *dox, struct DoxObject *o) { } } +static void DeserializeMacro(struct Dox *dox, struct DoxMacro *m) { + int i; + m->ignore = false; + m->name = DeserializeStr(dox); + m->path = DeserializeStr(dox); + m->line = DeserializeInt(dox); + m->is_objlike = DeserializeInt(dox); + m->va_args_name = DeserializeStr(dox); + m->javadown = DeserializeJavadown(dox); + m->params.n = DeserializeInt(dox); + m->params.p = FreeLater(dox, malloc(m->params.n * sizeof(*m->params.p))); + for (i = 0; i < m->params.n; ++i) { + m->params.p[i].name = DeserializeStr(dox); + } +} + static void DeserializeDox(struct Dox *dox) { int i, j, n; i = dox->objects.n; @@ -172,7 +219,16 @@ static void DeserializeDox(struct Dox *dox) { for (j = 0; j < n; ++j) { DeserializeObject(dox, dox->objects.p + i + j); } + i = dox->macros.n; dox->objects.n += n; + n = DeserializeInt(dox); + dox->macros.p = + realloc(dox->macros.p, (dox->macros.n + n) * sizeof(*dox->macros.p)); + for (j = 0; j < n; ++j) { + DeserializeMacro(dox, dox->macros.p + i + j); + } + dox->macros.n += n; + CHECK_EQ(31337, DeserializeInt(dox)); } static void ReadDox(struct Dox *dox, const StringArray *files) { @@ -210,43 +266,66 @@ static bool AddSet(struct Set *set, char *s) { } } -static int CompareObjectNames(const void *a, const void *b, void *arg) { - int *i1, *i2; +static int CompareDoxIndexEntry(const void *p1, const void *p2, void *arg) { struct Dox *dox; - i1 = a, i2 = b, dox = arg; - return strcmp(dox->objects.p[*i1].name, dox->objects.p[*i2].name); + const char *s1, *s2; + struct DoxIndexEntry *a, *b; + dox = arg, a = p1, b = p2; + s1 = a->t == kObject ? dox->objects.p[a->i].name : dox->macros.p[a->i].name; + s2 = b->t == kObject ? dox->objects.p[b->i].name : dox->macros.p[b->i].name; + while (*s1 == '_') ++s1; + while (*s2 == '_') ++s2; + return strcasecmp(s1, s2); } static void IndexDox(struct Dox *dox) { size_t i, j, n; - dox->names.n = roundup2pow(dox->objects.n) << 1; + dox->names.n = roundup2pow(dox->objects.n + dox->macros.n) << 1; dox->names.p = calloc(dox->names.n, sizeof(*dox->names.p)); - for (n = i = 0; i < dox->objects.n; ++i) { + n = 0; + for (i = 0; i < dox->objects.n; ++i) { if (AddSet(&dox->names, dox->objects.p[i].name)) { ++n; } else { dox->objects.p[i].ignore = true; } } - dox->objectindex.n = n; - dox->objectindex.p = malloc(n * sizeof(*dox->objectindex.p)); - for (j = i = 0; i < dox->objects.n; ++i) { - if (dox->objects.p[i].ignore) continue; - dox->objectindex.p[j++] = i; + for (i = 0; i < dox->macros.n; ++i) { + if (AddSet(&dox->names, dox->macros.p[i].name)) { + ++n; + } else { + dox->macros.p[i].ignore = true; + } } - qsort_r(dox->objectindex.p, dox->objectindex.n, sizeof(*dox->objectindex.p), - CompareObjectNames, dox); + dox->index.n = n; + dox->index.p = malloc(n * sizeof(*dox->index.p)); + j = 0; + for (i = 0; i < dox->objects.n; ++i) { + if (dox->objects.p[i].ignore) continue; + dox->index.p[j].t = kObject; + dox->index.p[j].i = i; + ++j; + } + for (i = 0; i < dox->macros.n; ++i) { + if (dox->macros.p[i].ignore) continue; + dox->index.p[j].t = kMacro; + dox->index.p[j].i = i; + ++j; + } + CHECK_EQ(n, j); + qsort_r(dox->index.p, dox->index.n, sizeof(*dox->index.p), + CompareDoxIndexEntry, dox); } static void PrintText(FILE *f, const char *s) { int c; - bool bol, pre; - for (pre = false, bol = true;;) { + bool bol, pre, ul0, ul2, bt1, bt2; + for (bt1 = bt2 = ul2 = ul0 = pre = false, bol = true;;) { switch ((c = *s++)) { case '\0': - if (pre) { - fprintf(f, ""); - } + if (bt1 || bt2) fprintf(f, ""); + if (pre) fprintf(f, ""); + if (ul0 || ul2) fprintf(f, ""); return; case '&': fprintf(f, "&"); @@ -268,25 +347,79 @@ static void PrintText(FILE *f, const char *s) { fprintf(f, "'"); bol = false; break; - case '\n': - if (!pre && *s == '\n') { + case '`': + if (!pre && !bt1 && !bt2 && *s != '`') { + fprintf(f, ""); + bt1 = true; + } else if (!pre && !bt1 && !bt2 && *s == '`') { + fprintf(f, ""); + bt2 = true; ++s; + } else if (bt1) { + fprintf(f, ""); + bt1 = false; + } else if (bt2 && *s == '`') { + fprintf(f, ""); + bt2 = false; + ++s; + } else { + fprintf(f, "`"); + } + bol = false; + break; + case '\n': + if (!pre && !ul0 && !ul2 && *s == '\n') { fprintf(f, "\n

"); - } else if (pre && + bol = true; + } else if (pre && s[0] != '\n' && (s[0] != ' ' || s[1] != ' ' || s[2] != ' ' || s[3] != ' ')) { fprintf(f, "\n"); pre = false; + bol = true; + } else if (ul0 && s[0] == '-' && s[1] == ' ') { + fprintf(f, "\n

  • "); + s += 2; + bol = false; + } else if (ul2 && s[0] == ' ' && s[1] == ' ' && s[2] == '-' && + s[3] == ' ') { + fprintf(f, "\n
  • "); + s += 4; + bol = false; + } else if (ul0 && s[0] != '\n' && (s[0] != ' ' || s[1] != ' ')) { + fprintf(f, "\n\n"); + bol = true; + ul0 = false; + } else if (ul2 && s[0] != '\n' && + (s[0] != ' ' || s[1] != ' ' || s[2] != ' ' || s[3] != ' ')) { + fprintf(f, "\n\n"); + bol = true; + ul2 = false; } else { fprintf(f, "\n"); + bol = true; } - bol = true; + break; + case '-': + if (bol && !ul0 && !ul2 && s[0] == ' ') { + ul0 = true; + fprintf(f, "
    • "); + } else { + fprintf(f, "-"); + } + bol = false; break; case ' ': if (bol && !pre && s[0] == ' ' && s[1] == ' ' && s[2] == ' ') { pre = true; - fprintf(f, "
      ");
      +          fprintf(f, "
       ");
      +        } else if (bol && !ul0 && !ul2 && s[0] == ' ' && s[1] == '-' &&
      +                   s[2] == ' ') {
      +          ul2 = true;
      +          fprintf(f, "
      • "); + s += 3; + } else { + fprintf(f, " "); } - fprintf(f, " "); bol = false; break; default: @@ -297,83 +430,364 @@ static void PrintText(FILE *f, const char *s) { } } +static bool HasTag(struct Javadown *jd, const char *tag) { + int k; + if (jd) { + for (k = 0; k < jd->tags.n; ++k) { + if (!strcmp(jd->tags.p[k].tag, tag)) { + return true; + } + } + } + return false; +} + +static bool IsNoReturn(struct DoxObject *o) { + return o->is_noreturn || HasTag(o->javadown, "noreturn"); +} + static void PrintDox(struct Dox *dox, FILE *f) { int i, j, k; char *prefix; + bool was_outputted; + struct DoxMacro *m; struct DoxObject *o; + + // header fprintf(f, "\ \n\ +\n\ \n\ +\n\ +\n\ +Cosmopolitan C Library\n\ +\n\ +\n\ +\n\ +\n\ \n\ \n\ -
        \n\ -

        \n\ +

        \n\ + \"honeybadger\"\n\ +

        cosmopolitan libc

        \n\ + build-once run-anywhere c without devops\n\ +
        \n\ +\n\ +\n\ +\n\ +\n\ +\n\ +
        \n\ "); - for (i = 0; i < dox->objectindex.n; ++i) { - o = dox->objects.p + dox->objectindex.p[i]; - if (o->ignore || !o->is_function) continue; + + /* // lefthand index: objects */ + /* fprintf(f, "

        macro objects\n"); */ + /* fprintf(f, "

        \n"); */ + /* for (i = 0; i < dox->index.n; ++i) { */ + /* if (dox->index.p[i].t != kMacro) continue; */ + /* m = dox->macros.p + dox->index.p[i].i; */ + /* if (m->ignore) continue; */ + /* if (!m->is_objlike) continue; */ + /* fprintf(f, "%s
        \n", m->name, m->name); */ + /* } */ + + /* // lefthand index: functions */ + /* fprintf(f, "

        macro functions\n"); */ + /* fprintf(f, "

        \n"); */ + /* for (i = 0; i < dox->index.n; ++i) { */ + /* if (dox->index.p[i].t != kMacro) continue; */ + /* m = dox->macros.p + dox->index.p[i].i; */ + /* if (m->ignore) continue; */ + /* if (m->is_objlike) continue; */ + /* fprintf(f, "%s
        \n", m->name, m->name); */ + /* } */ + + // lefthand index: objects + fprintf(f, "

        objects\n"); + fprintf(f, "

        \n"); + for (i = 0; i < dox->index.n; ++i) { + if (dox->index.p[i].t != kObject) continue; + o = dox->objects.p + dox->index.p[i].i; + if (o->ignore) continue; + if (o->is_function) continue; fprintf(f, "%s
        \n", o->name, o->name); - fprintf(f, "
        \n"); } - fprintf(f, "

        \n"); - for (i = 0; i < dox->objectindex.n; ++i) { - o = dox->objects.p + dox->objectindex.p[i]; - if (o->ignore || !o->is_function) continue; - fprintf(f, "\n
        \n", o->name, o->name); - fprintf(f, "

        ", o->name); - fprintf(f, "%s

        ", o->name); - fprintf(f, "

        "); - PrintText(f, o->javadown->title); - fprintf(f, "\n"); - if (*o->javadown->text) { - fprintf(f, "

        "); - PrintText(f, o->javadown->text); + + // lefthand index: functions + fprintf(f, "

        functions\n"); + fprintf(f, "

        \n"); + for (i = 0; i < dox->index.n; ++i) { + if (dox->index.p[i].t != kObject) continue; + o = dox->objects.p + dox->index.p[i].i; + if (o->ignore) continue; + if (!o->is_function) continue; + fprintf(f, "%s
        \n", o->name, o->name); + } + + // righthand contents + fprintf(f, "

        \n"); + for (i = 0; i < dox->index.n; ++i) { + if (dox->index.p[i].t == kObject) { + o = dox->objects.p + dox->index.p[i].i; + if (o->ignore) continue; fprintf(f, "\n"); - } - fprintf(f, "

        @param\n"); - fprintf(f, "

        \n"); - if (o->params.n) { - fprintf(f, "
        \n"); - for (j = 0; j < o->params.n; ++j) { - fprintf(f, "
        "); - PrintText(f, o->params.p[j].type); - fprintf(f, " "); - PrintText(f, o->params.p[j].name); - fprintf(f, "\n"); - prefix = xasprintf("%s ", o->params.p[j].name); - for (k = 0; k < o->javadown->tags.n; ++k) { - if (!strcmp(o->javadown->tags.p[k].tag, "param") && - startswith(o->javadown->tags.p[k].text, prefix)) { - fprintf(f, "
        "); - PrintText(f, o->javadown->tags.p[k].text + strlen(prefix)); - fprintf(f, "\n"); - break; - } - } - free(prefix); - } - fprintf(f, "
        \n"); - } else { - fprintf(f, "

        None.\n"); - } - fprintf(f, "

        \n"); - for (k = 0; k < o->javadown->tags.n; ++k) { - if (!strcmp(o->javadown->tags.p[k].tag, "param")) continue; - fprintf(f, "

        @"); - PrintText(f, o->javadown->tags.p[k].tag); - fprintf(f, "\n"); - if (*o->javadown->tags.p[k].text) { - PrintText(f, o->javadown->tags.p[k].text); + if (i) fprintf(f, "


        "); + fprintf(f, "
        \n", o->name); + fprintf(f, "

        %s

        ", o->name, o->name); + + // title + if (o->javadown && *o->javadown->title) { + fprintf(f, "

        "); + PrintText(f, o->javadown->title); fprintf(f, "\n"); } + + // text + if (o->javadown && *o->javadown->text) { + fprintf(f, "

        "); + PrintText(f, o->javadown->text); + fprintf(f, "\n"); + } + + // parameters + if (o->is_function && (o->params.n || HasTag(o->javadown, "param"))) { + fprintf(f, "

        \n"); + fprintf(f, "@param\n"); + fprintf(f, "
        \n"); + if (o->params.n) { + for (j = 0; j < o->params.n; ++j) { + fprintf(f, "
        "); + PrintText(f, o->params.p[j].type); + fprintf(f, " "); + PrintText(f, o->params.p[j].name); + fprintf(f, "\n"); + if (o->javadown) { + prefix = xasprintf("%s ", o->params.p[j].name); + for (k = 0; k < o->javadown->tags.n; ++k) { + if (!strcmp(o->javadown->tags.p[k].tag, "param") && + startswith(o->javadown->tags.p[k].text, prefix)) { + fprintf(f, "
        "); + PrintText(f, o->javadown->tags.p[k].text + strlen(prefix)); + fprintf(f, "\n"); + break; + } + } + free(prefix); + } + } + } else { + for (k = 0; k < o->javadown->tags.n; ++k) { + if (!strcmp(o->javadown->tags.p[k].tag, "param")) { + fprintf(f, "
        "); + PrintText(f, o->javadown->tags.p[k].text); + fprintf(f, "\n"); + break; + } + } + } + fprintf(f, "
        \n"); + fprintf(f, "
        \n"); // .tag + } + + // return + if (o->is_function) { + fprintf(f, "
        \n"); + if (IsNoReturn(o)) { + fprintf(f, "@noreturn\n"); + } else { + fprintf(f, "@return\n"); + was_outputted = false; + fprintf(f, "
        \n"); + if (o->javadown) { + for (k = 0; k < o->javadown->tags.n; ++k) { + if (strcmp(o->javadown->tags.p[k].tag, "return")) continue; + if (!was_outputted) { + fprintf(f, "
        "); + PrintText(f, o->type); + was_outputted = true; + } + fprintf(f, "\n
        "); + PrintText(f, o->javadown->tags.p[k].text); + fprintf(f, "\n"); + } + } + if (!was_outputted) { + fprintf(f, "
        "); + PrintText(f, o->type); + } + fprintf(f, "
        \n"); + fprintf(f, "
        \n"); // .tag + } + } + + // tags + if (o->javadown) { + for (k = 0; k < o->javadown->tags.n; ++k) { + if (!strcmp(o->javadown->tags.p[k].tag, "param")) continue; + if (!strcmp(o->javadown->tags.p[k].tag, "return")) continue; + if (!strcmp(o->javadown->tags.p[k].tag, "noreturn")) continue; + fprintf(f, "
        \n"); + fprintf(f, "@"); + PrintText(f, o->javadown->tags.p[k].tag); + fprintf(f, "\n"); + if (*o->javadown->tags.p[k].text) { + PrintText(f, o->javadown->tags.p[k].text); + fprintf(f, "\n"); + } + fprintf(f, "
        \n"); // .tag + } + } + + // sauce + if (strcmp(o->path, "missingno.c")) { + fprintf(f, "
        \n"); + fprintf(f, + "@see %s", + o->path, o->line, o->path); + fprintf(f, "
        \n"); // .tag + } + + fprintf(f, "
        \n"); /* class=".api" */ + } else { + continue; + m = dox->macros.p + dox->index.p[i].i; + if (m->ignore) continue; + fprintf(f, "\n"); + if (i) fprintf(f, "
        "); + fprintf(f, "
        \n", m->name); + fprintf(f, "

        %s

        ", m->name, m->name); + + // title + if (m->javadown && *m->javadown->title) { + fprintf(f, "

        "); + PrintText(f, m->javadown->title); + fprintf(f, "\n"); + } + + // text + if (m->javadown && *m->javadown->text) { + fprintf(f, "

        "); + PrintText(f, m->javadown->text); + fprintf(f, "\n"); + } + + // parameters + if (!m->is_objlike && (m->params.n || HasTag(m->javadown, "param"))) { + fprintf(f, "

        \n"); + fprintf(f, "@param\n"); + fprintf(f, "
        \n"); + if (m->params.n) { + for (j = 0; j < m->params.n; ++j) { + fprintf(f, "
        "); + fprintf(f, ""); + PrintText(f, m->params.p[j].name); + fprintf(f, "\n"); + if (m->javadown) { + prefix = xasprintf("%s ", m->params.p[j].name); + for (k = 0; k < m->javadown->tags.n; ++k) { + if (!strcmp(m->javadown->tags.p[k].tag, "param") && + startswith(m->javadown->tags.p[k].text, prefix)) { + fprintf(f, "
        "); + PrintText(f, m->javadown->tags.p[k].text + strlen(prefix)); + fprintf(f, "\n"); + break; + } + } + free(prefix); + } + } + } else { + for (k = 0; k < m->javadown->tags.n; ++k) { + if (!strcmp(m->javadown->tags.p[k].tag, "param")) { + fprintf(f, "
        "); + PrintText(f, m->javadown->tags.p[k].text); + fprintf(f, "\n"); + break; + } + } + } + fprintf(f, "
        \n"); + fprintf(f, "
        \n"); // .tag + } + + fprintf(f, "
        \n"); /* class=".api" */ } - fprintf(f, "\n"); } fprintf(f, "
        \n"); + + // footer + fprintf(f, "\ +\n\ +\n\ +"); } /** diff --git a/third_party/chibicc/parse.c b/third_party/chibicc/parse.c index 0818f2d1..006db9b8 100644 --- a/third_party/chibicc/parse.c +++ b/third_party/chibicc/parse.c @@ -57,6 +57,7 @@ typedef struct { bool is_destructor; bool is_constructor; bool is_externally_visible; + bool is_no_instrument_function; bool is_force_align_arg_pointer; bool is_no_caller_saved_registers; int align; @@ -476,6 +477,10 @@ static Token *thing_attributes(Token *tok, void *arg) { attr->is_externally_visible = true; return tok; } + if (consume_attribute(&tok, tok, "no_instrument_function")) { + attr->is_no_instrument_function = true; + return tok; + } if (consume_attribute(&tok, tok, "force_align_arg_pointer")) { attr->is_force_align_arg_pointer = true; return tok; @@ -555,7 +560,6 @@ static Token *thing_attributes(Token *tok, void *arg) { consume_attribute(&tok, tok, "no_split_stack") || consume_attribute(&tok, tok, "no_stack_limit") || consume_attribute(&tok, tok, "no_sanitize_undefined") || - consume_attribute(&tok, tok, "no_instrument_function") || consume_attribute(&tok, tok, "no_profile_instrument_function")) { return tok; } @@ -1018,6 +1022,7 @@ static Type *enum_specifier(Token **rest, Token *tok) { *rest = tok; return ty; } + ty->name = tag; tok = skip(tok, '{'); // Read an enum-list. int i = 0; @@ -2066,8 +2071,9 @@ int64_t eval2(Node *node, char ***label) { } error_tok(node->tok, "not a compile-time constant"); } - if (node->var->ty->kind != TY_ARRAY && node->var->ty->kind != TY_FUNC) + if (node->var->ty->kind != TY_ARRAY && node->var->ty->kind != TY_FUNC) { error_tok(node->tok, "invalid initializer"); + } *label = &node->var->name; return 0; case ND_NUM: @@ -2727,6 +2733,7 @@ static Type *struct_union_decl(Token **rest, Token *tok) { push_tag_scope(tag, ty); return ty; } + ty->name = tag; tok = skip(tok, '{'); // Construct a struct object. struct_members(&tok, tok, ty); @@ -3361,10 +3368,12 @@ static Obj *find_func(char *name) { } static void mark_live(Obj *var) { + int i; + Obj *fn; if (!var->is_function || var->is_live) return; var->is_live = true; - for (int i = 0; i < var->refs.len; i++) { - Obj *fn = find_func(var->refs.data[i]); + for (i = 0; i < var->refs.len; i++) { + fn = find_func(var->refs.data[i]); if (fn) mark_live(fn); } } @@ -3385,25 +3394,28 @@ static Token *function(Token *tok, Type *basety, VarAttr *attr) { fn->is_definition = fn->is_definition || EQUAL(tok, "{"); fn->is_weak |= attr->is_weak; fn->is_noreturn |= attr->is_noreturn; + fn->tok = ty->name; } else { fn = new_gvar(name_str, ty); + fn->tok = ty->name; fn->is_function = true; fn->is_definition = EQUAL(tok, "{"); fn->is_static = attr->is_static || (attr->is_inline && !attr->is_extern); fn->is_inline = attr->is_inline; - fn->is_weak = attr->is_weak; - fn->is_ms_abi = attr->is_ms_abi; - fn->is_aligned = attr->is_aligned; - fn->is_noreturn = attr->is_noreturn; - fn->is_destructor = attr->is_destructor; - fn->is_constructor = attr->is_constructor; - fn->is_externally_visible = attr->is_externally_visible; - fn->is_force_align_arg_pointer = attr->is_force_align_arg_pointer; - fn->is_no_caller_saved_registers = attr->is_no_caller_saved_registers; - fn->align = attr->align; - fn->section = attr->section; - fn->visibility = attr->visibility; } + fn->align = MAX(fn->align, attr->align); + fn->is_weak |= attr->is_weak; + fn->section = fn->section ?: attr->section; + fn->is_ms_abi |= attr->is_ms_abi; + fn->visibility = fn->visibility ?: attr->visibility; + fn->is_aligned |= attr->is_aligned; + fn->is_noreturn |= attr->is_noreturn; + fn->is_destructor |= attr->is_destructor; + fn->is_constructor |= attr->is_constructor; + fn->is_externally_visible |= attr->is_externally_visible; + fn->is_no_instrument_function |= attr->is_no_instrument_function; + fn->is_force_align_arg_pointer |= attr->is_force_align_arg_pointer; + fn->is_no_caller_saved_registers |= attr->is_no_caller_saved_registers; fn->javadown = fn->javadown ?: current_javadown; fn->is_root = !(fn->is_static && fn->is_inline); if (consume_attribute(&tok, tok, "asm")) { @@ -3452,6 +3464,7 @@ static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) { Type *ty = declarator(&tok, tok, basety); if (!ty->name) error_tok(ty->name_pos, "variable name omitted"); Obj *var = new_gvar(get_ident(ty->name), ty); + if (!var->tok) var->tok = ty->name; var->javadown = current_javadown; if (consume_attribute(&tok, tok, "asm")) { tok = skip(tok, '('); @@ -3459,9 +3472,16 @@ static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) { tok = skip(tok, ')'); } tok = attribute_list(tok, attr, thing_attributes); + var->align = MAX(var->align, attr->align); + var->is_weak = attr->is_weak; + var->section = attr->section; + var->visibility = attr->visibility; + var->is_aligned = var->is_aligned | attr->is_aligned; + var->is_externally_visible = attr->is_externally_visible; var->is_definition = !attr->is_extern; var->is_static = attr->is_static; var->is_tls = attr->is_tls; + var->section = attr->section; if (attr->align) var->align = attr->align; if (EQUAL(tok, "=")) { gvar_initializer(&tok, tok->next, var); @@ -3535,15 +3555,30 @@ static Obj *declare3(char *s, Type *r, Type *a, Type *b, Type *c) { return new_gvar(xstrcat("__builtin_", s), ty); } +static void math0(char *name) { + declare0(name, ty_double); + declare0(xstrcat(name, 'f'), ty_float); + declare0(xstrcat(name, 'l'), ty_ldouble); +} + +static void math1(char *name) { + declare1(name, ty_double, ty_double); + declare1(xstrcat(name, 'f'), ty_float, ty_float); + declare1(xstrcat(name, 'l'), ty_ldouble, ty_ldouble); +} + +static void math2(char *name) { + declare2(name, ty_double, ty_double, ty_double); + declare2(xstrcat(name, 'f'), ty_float, ty_float, ty_float); + declare2(xstrcat(name, 'l'), ty_ldouble, ty_ldouble, ty_ldouble); +} + void declare_builtin_functions(void) { Type *pvoid = pointer_to(ty_void); Type *pchar = pointer_to(ty_char); builtin_alloca = declare1("alloca", pointer_to(ty_void), ty_int); declare0("trap", ty_int); declare0("unreachable", ty_int); - declare0("inff", ty_float); - declare0("inf", ty_double); - declare0("infl", ty_ldouble); declare1("ctz", ty_int, ty_int); declare1("ctzl", ty_long, ty_long); declare1("ctzll", ty_long, ty_long); @@ -3581,6 +3616,16 @@ void declare_builtin_functions(void) { declare2("strchr", pchar, pchar, ty_int); declare2("strstr", pchar, pchar, pchar); declare1("frame_address", pvoid, ty_int); + declare2("scalbnf", ty_float, ty_float, ty_int); + declare2("scalbn", ty_double, ty_double, ty_int); + declare2("scalbnl", ty_ldouble, ty_ldouble, ty_int); + math0("inf"); + math0("huge_val"); + math1("fabs"); + math1("logb"); + math2("fmax"); + math2("fmin"); + math2("copysign"); } // program = (typedef | function-definition | global-variable)* diff --git a/third_party/chibicc/printast.c b/third_party/chibicc/printast.c index 6c46bb90..a8dcdd7c 100644 --- a/third_party/chibicc/printast.c +++ b/third_party/chibicc/printast.c @@ -119,7 +119,6 @@ static void PrintType(FILE *f, int l, const char *s, Type *t) { PrintInt(f, l + 2, "align: ", t->align); PrintBool(f, l + 2, "is_unsigned: ", t->is_unsigned); PrintBool(f, l + 2, "is_atomic: ", t->is_atomic); - PrintType(f, l + 2, "origin: ", t->origin); PrintType(f, l + 2, "base: ", t->base); PrintTokStr(f, l + 2, "name: ", t->name); PrintTokStr(f, l + 2, "name_pos: ", t->name_pos); @@ -231,6 +230,13 @@ static void PrintObj(FILE *f, int l, const char *s, Obj *o) { PrintBool(f, l + 2, "is_noreturn: ", o->is_noreturn); PrintBool(f, l + 2, "is_destructor: ", o->is_destructor); PrintBool(f, l + 2, "is_constructor: ", o->is_constructor); + PrintBool(f, l + 2, "is_externally_visible: ", o->is_externally_visible); + PrintBool(f, l + 2, + "is_no_instrument_function: ", o->is_no_instrument_function); + PrintBool(f, l + 2, + "is_force_align_arg_pointer: ", o->is_force_align_arg_pointer); + PrintBool(f, l + 2, + "is_no_caller_saved_registers: ", o->is_no_caller_saved_registers); PrintInt(f, l + 2, "stack_size: ", o->stack_size); PrintObj(f, l + 2, "params: ", o->params); PrintNode(f, l + 2, "body: ", o->body); diff --git a/third_party/chibicc/test/builtin_test.c b/third_party/chibicc/test/builtin_test.c index b94debbc..55e81813 100644 --- a/third_party/chibicc/test/builtin_test.c +++ b/third_party/chibicc/test/builtin_test.c @@ -1,3 +1,4 @@ +#include "libc/math.h" #include "third_party/chibicc/test/test.h" #define FPNAN 0 @@ -115,6 +116,31 @@ void test_fpclassify(void) { ASSERT(FPNAN, FPCLASSIFY(__builtin_nanl(""))); } +void test_logb(void) { + ASSERT(6, __builtin_logbl(123.456)); + ASSERT(logbl(123.456L), __builtin_logbl(123.456L)); + ASSERT(logbl(__LDBL_MIN__), __builtin_logbl(__LDBL_MIN__)); + ASSERT(logbl(__LDBL_MAX__), __builtin_logbl(__LDBL_MAX__)); +} + +void test_fmax(void) { + ASSERT(fmaxl(1, 2), __builtin_fmaxl(1, 2)); + ASSERT(2, __builtin_fmaxl(__builtin_nanl(""), 2)); + ASSERT(1, __builtin_fmaxl(1, __builtin_nanl(""))); + ASSERT(2, fmaxl(nanl(""), 2)); + ASSERT(1, fmaxl(1, nanl(""))); + ASSERT(fmaxf(1, 2), __builtin_fmaxf(1, 2)); + ASSERT(2, __builtin_fmaxf(__builtin_nanl(""), 2)); + ASSERT(1, __builtin_fmaxf(1, __builtin_nanl(""))); + ASSERT(2, fmaxf(nanl(""), 2)); + ASSERT(1, fmaxf(1, nanl(""))); + ASSERT(fmax(1, 2), __builtin_fmax(1, 2)); + ASSERT(2, __builtin_fmax(__builtin_nanl(""), 2)); + ASSERT(1, __builtin_fmax(1, __builtin_nanl(""))); + ASSERT(2, fmax(nanl(""), 2)); + ASSERT(1, fmax(1, nanl(""))); +} + void test_strlen(void) { ASSERT(5, strlen("hello")); ASSERT(5, __builtin_strlen("hello")); @@ -414,5 +440,7 @@ int main() { test_strchr(); test_strpbrk(); test_strstr(); + test_logb(); + test_fmax(); return 0; } diff --git a/third_party/chibicc/test/test.mk b/third_party/chibicc/test/test.mk index dacc120a..73a27230 100644 --- a/third_party/chibicc/test/test.mk +++ b/third_party/chibicc/test/test.mk @@ -49,6 +49,7 @@ THIRD_PARTY_CHIBICC_TEST_DIRECTDEPS = \ LIBC_NEXGEN32E \ LIBC_UNICODE \ LIBC_MEM \ + LIBC_TINYMATH \ LIBC_X \ THIRD_PARTY_CHIBICC \ THIRD_PARTY_COMPILER_RT diff --git a/third_party/compiler_rt/comparedf2.c b/third_party/compiler_rt/comparedf2.c index 6912f490..6e36614f 100644 --- a/third_party/compiler_rt/comparedf2.c +++ b/third_party/compiler_rt/comparedf2.c @@ -83,10 +83,11 @@ __ledf2(fp_t a, fp_t b) { } } -#if defined(__ELF__) // Alias for libgcc compatibility -FNALIAS(__cmpdf2, __ledf2); -#endif +COMPILER_RT_ABI enum LE_RESULT +__cmpdf2(fp_t a, fp_t b) { + return __ledf2(a, b); +} enum GE_RESULT { GE_LESS = -1, diff --git a/third_party/compiler_rt/comparesf2.c b/third_party/compiler_rt/comparesf2.c index bf1ad804..008da545 100644 --- a/third_party/compiler_rt/comparesf2.c +++ b/third_party/compiler_rt/comparesf2.c @@ -83,10 +83,11 @@ __lesf2(fp_t a, fp_t b) { } } -#if defined(__ELF__) // Alias for libgcc compatibility -FNALIAS(__cmpsf2, __lesf2); -#endif +COMPILER_RT_ABI enum LE_RESULT +__cmpsf2(fp_t a, fp_t b) { + return __lesf2(a, b); +} enum GE_RESULT { GE_LESS = -1, diff --git a/third_party/compiler_rt/comparetf2.c b/third_party/compiler_rt/comparetf2.c index 9fe48521..b448dc22 100644 --- a/third_party/compiler_rt/comparetf2.c +++ b/third_party/compiler_rt/comparetf2.c @@ -52,18 +52,14 @@ enum LE_RESULT { }; COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { - const srep_t aInt = toRep(a); const srep_t bInt = toRep(b); const rep_t aAbs = aInt & absMask; const rep_t bAbs = bInt & absMask; - // If either a or b is NaN, they are unordered. if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; - // If a and b are both zeros, they are equal. if ((aAbs | bAbs) == 0) return LE_EQUAL; - // If at least one of a and b is positive, we get the same result comparing // a and b as signed integers as we would with a floating-point compare. if ((aInt & bInt) >= 0) { @@ -82,10 +78,10 @@ COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { } } -#if defined(__ELF__) // Alias for libgcc compatibility -FNALIAS(__cmptf2, __letf2); -#endif +COMPILER_RT_ABI enum LE_RESULT __cmptf2(fp_t a, fp_t b) { + return __letf2(a, b); +} enum GE_RESULT { GE_LESS = -1, diff --git a/third_party/compiler_rt/int_math.h b/third_party/compiler_rt/int_math.h index 721478ab..7425b695 100644 --- a/third_party/compiler_rt/int_math.h +++ b/third_party/compiler_rt/int_math.h @@ -45,7 +45,7 @@ # define crt_isfinite(x) __builtin_isfinite((x)) #elif defined(__GNUC__) # define crt_isfinite(x) \ - __extension__(({ \ + (({ \ __typeof((x)) x_ = (x); \ !crt_isinf(x_) && !crt_isnan(x_); \ })) diff --git a/third_party/compiler_rt/int_types.h b/third_party/compiler_rt/int_types.h index 02654e29..dd6c7382 100644 --- a/third_party/compiler_rt/int_types.h +++ b/third_party/compiler_rt/int_types.h @@ -63,8 +63,8 @@ typedef union } udwords; #ifdef CRT_HAS_128BIT -typedef int ti_int __attribute__ ((mode (TI))); -typedef unsigned tu_int __attribute__ ((mode (TI))); +typedef __int128 ti_int; +typedef unsigned __int128 tu_int; typedef union { @@ -141,7 +141,7 @@ typedef union long double f; } long_double_bits; -#if __STDC_VERSION__ >= 199901L +#if __STDC_VERSION__ >= 199901L && !defined(__STDC_NO_COMPLEX__) typedef float _Complex Fcomplex; typedef double _Complex Dcomplex; typedef long double _Complex Lcomplex; diff --git a/third_party/compiler_rt/udivmodti4.c b/third_party/compiler_rt/udivmodti4.c index 5acefffd..8c240e10 100644 --- a/third_party/compiler_rt/udivmodti4.c +++ b/third_party/compiler_rt/udivmodti4.c @@ -71,7 +71,7 @@ forceinline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v, forceinline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, du_int *r) { #ifdef __x86_64__ du_int result; - asm("div\t%2" : "=a"(result), "=d"(*r) : "r"(v), "a"(u0), "d"(u1) : "cc"); + asm("div\t%2" : "=a"(result), "=d"(*r) : "r"(v), "0"(u0), "1"(u1) : "cc"); return result; #else return udiv128by64to64default(u1, u0, v, r); diff --git a/third_party/dlmalloc/dlindependent_calloc.c b/third_party/dlmalloc/dlindependent_calloc.c index cab3b1df..c94f442e 100644 --- a/third_party/dlmalloc/dlindependent_calloc.c +++ b/third_party/dlmalloc/dlindependent_calloc.c @@ -146,20 +146,20 @@ static void **ialloc(mstate m, size_t n_elements, size_t *sizes, int opts, * but the number is not known at compile time, and some of the nodes * may later need to be freed. For example: * - * struct Node { int item; struct Node* next; }; - * struct Node* build_list() { - * struct Node **pool; - * int n = read_number_of_nodes_needed(); - * if (n <= 0) return 0; - * pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); - * if (pool == 0) __die(); - * // organize into a linked list... - * struct Node* first = pool[0]; - * for (i = 0; i < n-1; ++i) - * pool[i]->next = pool[i+1]; - * free(pool); * // Can now free the array (or not, if it is needed later) - * return first; - * } + * struct Node { int item; struct Node* next; }; + * struct Node* build_list() { + * struct Node **pool; + * int n = read_number_of_nodes_needed(); + * if (n <= 0) return 0; + * pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + * if (pool == 0) __die(); + * // organize into a linked list... + * struct Node* first = pool[0]; + * for (i = 0; i < n-1; ++i) + * pool[i]->next = pool[i+1]; + * free(pool); * // Can now free the array (or not, if it is needed later) + * return first; + * } */ void **dlindependent_calloc(size_t n_elements, size_t elem_size, void *chunks[]) { @@ -199,19 +199,18 @@ void **dlindependent_calloc(size_t n_elements, size_t elem_size, * where several structs or objects must always be allocated at the * same time. For example: * - * struct Head { ... } - * struct Foot { ... } - * - * void send_message(char* msg) { - * int msglen = strlen(msg); - * size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; - * void* chunks[3]; - * if (independent_comalloc(3, sizes, chunks) == 0) __die(); - * struct Head* head = (struct Head*)(chunks[0]); - * char* body = (char*)(chunks[1]); - * struct Foot* foot = (struct Foot*)(chunks[2]); - * // ... - * } + * struct Head { ... } + * struct Foot { ... } + * void send_message(char* msg) { + * int msglen = strlen(msg); + * size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + * void* chunks[3]; + * if (independent_comalloc(3, sizes, chunks) == 0) __die(); + * struct Head* head = (struct Head*)(chunks[0]); + * char* body = (char*)(chunks[1]); + * struct Foot* foot = (struct Foot*)(chunks[2]); + * // ... + * } * * In general though, independent_comalloc is worth using only for * larger values of n_elements. For small values, you probably won't diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index 7a1085b2..a0bc5200 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -1,5 +1,5 @@ #include "libc/bits/initializer.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/internal.h" #include "libc/calls/struct/sysinfo.h" #include "libc/dce.h" @@ -21,8 +21,8 @@ STATIC_YOINK("_init_dlmalloc"); #define OOM_WARNING "warning: running out of physical memory\n" #define is_global(M) ((M) == g_dlmalloc) -struct MallocState g_dlmalloc[1]; -struct MallocParams g_mparams; +hidden struct MallocState g_dlmalloc[1]; +hidden struct MallocParams g_mparams; /** * Acquires more system memory for dlmalloc. diff --git a/third_party/dlmalloc/malloc_trim.c b/third_party/dlmalloc/malloc_trim.c index a2657b18..caf6fe75 100644 --- a/third_party/dlmalloc/malloc_trim.c +++ b/third_party/dlmalloc/malloc_trim.c @@ -3,7 +3,7 @@ /** * If possible, gives memory back to the system (via negative arguments - * to sbrk) if there is unused memory at the `high' end of the malloc + * to sbrk) if there is unused memory at the `high` end of the malloc * pool or in unused MMAP segments. You can call this after freeing * large blocks of memory to potentially reduce the system-level memory * requirements of a program. However, it cannot guarantee to reduce @@ -11,7 +11,7 @@ * memory will be locked between two used chunks, so they cannot be * given back to the system. * - * The `pad' argument to malloc_trim represents the amount of free + * The `pad` argument to malloc_trim represents the amount of free * trailing space to leave untrimmed. If this argument is zero, only the * minimum amount of memory to maintain internal data structures will be * left. Non-zero arguments can be supplied to maintain enough trailing diff --git a/third_party/dlmalloc/mallopt.c b/third_party/dlmalloc/mallopt.c index 67fe25ff..a7ab0d60 100644 --- a/third_party/dlmalloc/mallopt.c +++ b/third_party/dlmalloc/mallopt.c @@ -13,10 +13,10 @@ * use in this malloc, so setting them has no effect. But this malloc * also supports other options in mallopt: * - * Symbol param # default allowed param values - * M_TRIM_THRESHOLD -1 2*1024*1024 any (-1U disables trimming) - * M_GRANULARITY -2 page size any power of 2 >= page size - * M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) + * Symbol param # default allowed param values + * M_TRIM_THRESHOLD -1 2*1024*1024 any (-1U disables trimming) + * M_GRANULARITY -2 page size any power of 2 >= page size + * M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) */ bool32 mallopt(int param_number, int value) { size_t val; diff --git a/third_party/gdtoa/README b/third_party/gdtoa/README index 220ecd9a..1491696f 100644 --- a/third_party/gdtoa/README +++ b/third_party/gdtoa/README @@ -50,8 +50,8 @@ two letters: and software emulations of Motorola 68xxx chips that do not pad the way the 68xxx does, but only store 80 bits - xL IEEE extended precision, as on Motorola 68xxx chips - Q quad precision, as on Sun Sparc chips + xL IEEE extended precision, as on Motorola 68xxx chips [jart: removed] + Q quad precision, as on Sun Sparc chips [jart: removed] dd double double, pairs of IEEE double numbers whose sum is the desired value diff --git a/third_party/gdtoa/dmisc.c b/third_party/gdtoa/dmisc.c index 9ddd5be1..59057ff9 100644 --- a/third_party/gdtoa/dmisc.c +++ b/third_party/gdtoa/dmisc.c @@ -36,12 +36,8 @@ THIS SOFTWARE. char *dtoa_result; #endif - char * -#ifdef KR_headers -rv_alloc(i MTa) int i; MTk -#else +char * rv_alloc(int i MTd) -#endif { int j, k, *r; @@ -59,12 +55,8 @@ rv_alloc(int i MTd) (char *)(r+1); } - char * -#ifdef KR_headers -nrv_alloc(s, rve, n MTa) char *s, **rve; int n; MTk -#else +char * nrv_alloc(char *s, char **rve, int n MTd) -#endif { char *rv, *t; @@ -82,12 +74,8 @@ nrv_alloc(char *s, char **rve, int n MTd) * when MULTIPLE_THREADS is not defined. */ - void -#ifdef KR_headers -freedtoa(s) char *s; -#else +void freedtoa(char *s) -#endif { #ifdef MULTIPLE_THREADS ThInfo *TI = 0; @@ -101,13 +89,8 @@ freedtoa(char *s) #endif } - int -quorem -#ifdef KR_headers - (b, S) Bigint *b, *S; -#else - (Bigint *b, Bigint *S) -#endif +int +quorem(Bigint *b, Bigint *S) { int n; ULong *bx, *bxe, q, *sx, *sxe; diff --git a/third_party/gdtoa/dtoa.c b/third_party/gdtoa/dtoa.c index 65892671..7884a8f8 100644 --- a/third_party/gdtoa/dtoa.c +++ b/third_party/gdtoa/dtoa.c @@ -73,14 +73,8 @@ THIS SOFTWARE. #define Rounding Flt_Rounds #endif - char * -dtoa -#ifdef KR_headers - (d0, mode, ndigits, decpt, sign, rve) - double d0; int mode, ndigits, *decpt, *sign; char **rve; -#else - (double d0, int mode, int ndigits, int *decpt, int *sign, char **rve) -#endif +char * +dtoa(double d0, int mode, int ndigits, int *decpt, int *sign, char **rve) { /* Arguments ndigits, decpt, sign are similar to those of ecvt and fcvt; trailing zeros are suppressed from diff --git a/third_party/gdtoa/g_Qfmt.c b/third_party/gdtoa/g_Qfmt.c deleted file mode 100644 index cd3f09e4..00000000 --- a/third_party/gdtoa/g_Qfmt.c +++ /dev/null @@ -1,120 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998, 2000 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#define _3 3 -#endif -#ifdef IEEE_8087 -#define _0 3 -#define _1 2 -#define _2 1 -#define _3 0 -#endif - - char* -#ifdef KR_headers -g_Qfmt(buf, V, ndig, bufsize) char *buf; char *V; int ndig; size_t bufsize; -#else -g_Qfmt(char *buf, void *V, int ndig, size_t bufsize) -#endif -{ - static const FPI fpi0 = { 113, 1-16383-113+1, 32766 - 16383 - 113 + 1, 1, 0, Int_max }; - char *b, *s, *se; - ULong bits[4], *L, sign; - int decpt, ex, i, mode; -#ifdef Honor_FLT_ROUNDS -#include "third_party/gdtoa/gdtoa_fltrnds.inc" -#else -#define fpi &fpi0 -#endif - - if (ndig < 0) - ndig = 0; - if (bufsize < (size_t)(ndig + 10)) - return 0; - - L = (ULong*)V; - sign = L[_0] & 0x80000000L; - bits[3] = L[_0] & 0xffff; - bits[2] = L[_1]; - bits[1] = L[_2]; - bits[0] = L[_3]; - b = buf; - if ( (ex = (L[_0] & 0x7fff0000L) >> 16) !=0) { - if (ex == 0x7fff) { - /* Infinity or NaN */ - if (bits[0] | bits[1] | bits[2] | bits[3]) - b = strcp(b, "NaN"); - else { - b = buf; - if (sign) - *b++ = '-'; - b = strcp(b, "Infinity"); - } - return b; - } - i = STRTOG_Normal; - bits[3] |= 0x10000; - } - else if (bits[0] | bits[1] | bits[2] | bits[3]) { - i = STRTOG_Denormal; - ex = 1; - } - else { -#ifndef IGNORE_ZERO_SIGN - if (sign) - *b++ = '-'; -#endif - *b++ = '0'; - *b = 0; - return b; - } - ex -= 0x3fff + 112; - mode = 2; - if (ndig <= 0) { - if (bufsize < 48) - return 0; - mode = 0; - } - s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se); - return g__fmt(buf, s, se, decpt, sign, bufsize); - } diff --git a/third_party/gdtoa/g_Qfmt_p.c b/third_party/gdtoa/g_Qfmt_p.c deleted file mode 100644 index 181ae8c0..00000000 --- a/third_party/gdtoa/g_Qfmt_p.c +++ /dev/null @@ -1,133 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998, 2000 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - - extern ULong NanDflt_Q_D2A[4]; - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#define _3 3 -#endif -#ifdef IEEE_8087 -#define _0 3 -#define _1 2 -#define _2 1 -#define _3 0 -#endif - - char* -#ifdef KR_headers -g_Qfmt_p(buf, V, ndig, bufsize, nik) char *buf; char *V; int ndig; size_t bufsize; int nik; -#else -g_Qfmt_p(char *buf, void *V, int ndig, size_t bufsize, int nik) -#endif -{ - static const FPI fpi0 = { 113, 1-16383-113+1, 32766 - 16383 - 113 + 1, 1, 0, Int_max }; - char *b, *s, *se; - ULong bits[4], *L, sign; - int decpt, ex, i, mode; -#ifdef Honor_FLT_ROUNDS -#include "third_party/gdtoa/gdtoa_fltrnds.inc" -#else -#define fpi &fpi0 -#endif - - if (ndig < 0) - ndig = 0; - if (bufsize < (size_t)(ndig + 10)) - return 0; - - L = (ULong*)V; - sign = L[_0] & 0x80000000L; - bits[3] = L[_0] & 0xffff; - bits[2] = L[_1]; - bits[1] = L[_2]; - bits[0] = L[_3]; - b = buf; - if ( (ex = (L[_0] & 0x7fff0000L) >> 16) !=0) { - if (ex == 0x7fff) { - /* Infinity or NaN */ - if (nik < 0 || nik > 35) - nik = 0; - if (bits[0] | bits[1] | bits[2] | bits[3]) { - if (sign && nik < 18) - *b++ = '-'; - b = strcp(b, NanName[nik%3]); - if (nik > 5 && (nik < 12 - || bits[0] != NanDflt_Q_D2A[0] - || bits[1] != NanDflt_Q_D2A[1] - || bits[2] != NanDflt_Q_D2A[2] - || (bits[2] ^ NanDflt_Q_D2A[2]) & 0xffff)) - b = add_nanbits(b, bufsize - (b-buf), bits, 4); - } - else { - b = buf; - if (sign) - *b++ = '-'; - b = strcp(b, InfName[nik%6]); - } - return b; - } - i = STRTOG_Normal; - bits[3] |= 0x10000; - } - else if (bits[0] | bits[1] | bits[2] | bits[3]) { - i = STRTOG_Denormal; - ex = 1; - } - else { -#ifndef IGNORE_ZERO_SIGN - if (sign) - *b++ = '-'; -#endif - *b++ = '0'; - *b = 0; - return b; - } - ex -= 0x3fff + 112; - mode = 2; - if (ndig <= 0) { - if (bufsize < 48) - return 0; - mode = 0; - } - s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se); - return g__fmt(buf, s, se, decpt, sign, bufsize); - } diff --git a/third_party/gdtoa/g__fmt.c b/third_party/gdtoa/g__fmt.c index c1dbb7ef..65cc693d 100644 --- a/third_party/gdtoa/g__fmt.c +++ b/third_party/gdtoa/g__fmt.c @@ -48,20 +48,16 @@ THIS SOFTWARE. #define ldus_QNAN4 0 #endif - const char *const InfName[6] = { "Infinity", "infinity", "INFINITY", "Inf", "inf", "INF" }; - const char *const NanName[3] = { "NaN", "nan", "NAN" }; - const ULong NanDflt_Q_D2A[4] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }; - const ULong NanDflt_d_D2A[2] = { d_QNAN1, d_QNAN0 }; - const ULong NanDflt_f_D2A[1] = { f_QNAN }; - const ULong NanDflt_xL_D2A[3] = { 1, 0x80000000, 0x7fff0000 }; - const UShort NanDflt_ldus_D2A[5] = { ldus_QNAN4, ldus_QNAN3, ldus_QNAN2, ldus_QNAN1, ldus_QNAN0 }; +const char *const InfName[6] = { "Infinity", "infinity", "INFINITY", "Inf", "inf", "INF" }; +const char *const NanName[3] = { "NaN", "nan", "NAN" }; +const ULong __gdtoa_NanDflt_Q[4] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }; +const ULong __gdtoa_NanDflt_d[2] = { d_QNAN1, d_QNAN0 }; +const ULong __gdtoa_NanDflt_f[1] = { f_QNAN }; +const ULong __gdtoa_NanDflt_xL[3] = { 1, 0x80000000, 0x7fff0000 }; +const UShort __gdtoa_NanDflt_ldus[5] = { ldus_QNAN4, ldus_QNAN3, ldus_QNAN2, ldus_QNAN1, ldus_QNAN0 }; - char * -#ifdef KR_headers -g__fmt(b, s, se, decpt, sign, blen) char *b; char *s; char *se; int decpt; ULong sign; size_t blen; -#else +char * g__fmt(char *b, char *s, char *se, int decpt, ULong sign, size_t blen) -#endif { int i, j, k; char *be, *s0; @@ -168,7 +164,7 @@ g__fmt(char *b, char *s, char *se, int decpt, ULong sign, size_t blen) } char * -add_nanbits_D2A(char *b, size_t blen, ULong *bits, int nb) +__gdtoa_add_nanbits(char *b, size_t blen, ULong *bits, int nb) { ULong t; char *rv; diff --git a/third_party/gdtoa/g_ddfmt.c b/third_party/gdtoa/g_ddfmt.c index ddb96599..54731668 100644 --- a/third_party/gdtoa/g_ddfmt.c +++ b/third_party/gdtoa/g_ddfmt.c @@ -31,12 +31,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg@acm.org). */ - char * -#ifdef KR_headers -g_ddfmt(buf, dd0, ndig, bufsize) char *buf; double *dd0; int ndig; size_t bufsize; -#else +char * g_ddfmt(char *buf, double *dd0, int ndig, size_t bufsize) -#endif { FPI fpi; char *b, *s, *se; diff --git a/third_party/gdtoa/g_ddfmt_p.c b/third_party/gdtoa/g_ddfmt_p.c index 59249648..4e294d3f 100644 --- a/third_party/gdtoa/g_ddfmt_p.c +++ b/third_party/gdtoa/g_ddfmt_p.c @@ -31,14 +31,10 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg@acm.org). */ - extern ULong NanDflt_d_D2A[2]; +extern ULong __gdtoa_NanDflt_d[2]; - char * -#ifdef KR_headers -g_ddfmt_p(buf, dd0, ndig, bufsize, nik) char *buf; double *dd0; int ndig; size_t bufsize; int nik; -#else +char * g_ddfmt_p(char *buf, double *dd0, int ndig, size_t bufsize, int nik) -#endif { FPI fpi; char *b, *s, *se; @@ -82,10 +78,10 @@ g_ddfmt_p(char *buf, double *dd0, int ndig, size_t bufsize, int nik) *b++ = '-'; b = strcp(b, NanName[nik%3]); if (nik > 5 && (nik < 12 - || L[_1] != NanDflt_d_D2A[0] - || (L[_0] ^ NanDflt_d_D2A[1]) & 0xfffff - || L[2+_1] != NanDflt_d_D2A[0] - || (L[2+_0] ^ NanDflt_d_D2A[1]) & 0xfffff)) { + || L[_1] != __gdtoa_NanDflt_d[0] + || (L[_0] ^ __gdtoa_NanDflt_d[1]) & 0xfffff + || L[2+_1] != __gdtoa_NanDflt_d[0] + || (L[2+_0] ^ __gdtoa_NanDflt_d[1]) & 0xfffff)) { bits0[0] = L[2+_1]; bits0[1] = (L[2+_0] & 0xfffff) | (L[_1] << 20); bits0[2] = (L[_1] >> 12) | (L[_0] << 20); diff --git a/third_party/gdtoa/g_dfmt.c b/third_party/gdtoa/g_dfmt.c index 6ce02bfc..70d51835 100644 --- a/third_party/gdtoa/g_dfmt.c +++ b/third_party/gdtoa/g_dfmt.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - char* -#ifdef KR_headers -g_dfmt(buf, d, ndig, bufsize) char *buf; double *d; int ndig; size_t bufsize; -#else +char* g_dfmt(char *buf, double *d, int ndig, size_t bufsize) -#endif { static const FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, 0, Int_max }; char *b, *s, *se; diff --git a/third_party/gdtoa/g_dfmt_p.c b/third_party/gdtoa/g_dfmt_p.c index 6842019c..1cf5c89d 100644 --- a/third_party/gdtoa/g_dfmt_p.c +++ b/third_party/gdtoa/g_dfmt_p.c @@ -32,14 +32,10 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - extern ULong NanDflt_d_D2A[2]; +extern ULong __gdtoa_NanDflt_d[2]; - char* -#ifdef KR_headers -g_dfmt_p(buf, d, ndig, bufsize, nik) char *buf; double *d; int ndig; size_t bufsize; int nik; -#else +char* g_dfmt_p(char *buf, double *d, int ndig, size_t bufsize, int nik) -#endif { static const FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, 0, Int_max }; char *b, *s, *se; @@ -70,8 +66,8 @@ g_dfmt_p(char *buf, double *d, int ndig, size_t bufsize, int nik) *b++ = '-'; b = strcp(b, NanName[nik%3]); if (nik > 5 && (nik < 12 - || bits[0] != NanDflt_d_D2A[0] - || (bits[1] ^ NanDflt_d_D2A[1]) & 0xfffff)) { + || bits[0] != __gdtoa_NanDflt_d[0] + || (bits[1] ^ __gdtoa_NanDflt_d[1]) & 0xfffff)) { bits[0] = L[_1]; bits[1] = L[_0] & 0xfffff; b = add_nanbits(b, bufsize - (b-buf), bits, 2); diff --git a/third_party/gdtoa/g_ffmt.c b/third_party/gdtoa/g_ffmt.c index 8270c748..f91401f3 100644 --- a/third_party/gdtoa/g_ffmt.c +++ b/third_party/gdtoa/g_ffmt.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - char* -#ifdef KR_headers -g_ffmt(buf, f, ndig, bufsize) char *buf; float *f; int ndig; size_t bufsize; -#else +char* g_ffmt(char *buf, float *f, int ndig, size_t bufsize) -#endif { static const FPI fpi0 = { 24, 1-127-24+1, 254-127-24+1, 1, 0, 6 }; char *b, *s, *se; diff --git a/third_party/gdtoa/g_ffmt_p.c b/third_party/gdtoa/g_ffmt_p.c index 8b9145d3..0ba53cbe 100644 --- a/third_party/gdtoa/g_ffmt_p.c +++ b/third_party/gdtoa/g_ffmt_p.c @@ -32,14 +32,10 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - extern ULong NanDflt_f_D2A[1]; +extern ULong __gdtoa_NanDflt_f[1]; - char* -#ifdef KR_headers -g_ffmt_p(buf, f, ndig, bufsize, nik) char *buf; float *f; int ndig; size_t bufsize; int nik; -#else +char* g_ffmt_p(char *buf, float *f, int ndig, size_t bufsize, int nik) -#endif { static const FPI fpi0 = { 24, 1-127-24+1, 254-127-24+1, 1, 0, 6 }; char *b, *s, *se; @@ -68,7 +64,7 @@ g_ffmt_p(char *buf, float *f, int ndig, size_t bufsize, int nik) *b++ = '-'; b = strcp(b, NanName[nik%3]); if (nik > 5 && (nik < 12 - || (bits[0] ^ NanDflt_f_D2A[0]) & 0x7fffff)) + || (bits[0] ^ __gdtoa_NanDflt_f[0]) & 0x7fffff)) b = add_nanbits(b, bufsize - (b-buf), bits, 1); return b; } diff --git a/third_party/gdtoa/g_xLfmt.c b/third_party/gdtoa/g_xLfmt.c deleted file mode 100644 index 1e6d8f7d..00000000 --- a/third_party/gdtoa/g_xLfmt.c +++ /dev/null @@ -1,114 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#endif -#ifdef IEEE_8087 -#define _0 2 -#define _1 1 -#define _2 0 -#endif - - char* -#ifdef KR_headers -g_xLfmt(buf, V, ndig, bufsize) char *buf; char *V; int ndig; size_t bufsize; -#else -g_xLfmt(char *buf, void *V, int ndig, size_t bufsize) -#endif -{ - static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, 0, Int_max }; - char *b, *s, *se; - ULong bits[2], *L, sign; - int decpt, ex, i, mode; -#ifdef Honor_FLT_ROUNDS -#include "third_party/gdtoa/gdtoa_fltrnds.inc" -#else -#define fpi &fpi0 -#endif - - if (ndig < 0) - ndig = 0; - if (bufsize < (size_t)(ndig + 10)) - return 0; - - L = (ULong*)V; - sign = L[_0] & 0x80000000L; - bits[1] = L[_1]; - bits[0] = L[_2]; - if ( (ex = (L[_0] >> 16) & 0x7fff) !=0) { - if (ex == 0x7fff) { - /* Infinity or NaN */ - if (bits[0] | bits[1]) - b = strcp(buf, "NaN"); - else { - b = buf; - if (sign) - *b++ = '-'; - b = strcp(b, "Infinity"); - } - return b; - } - i = STRTOG_Normal; - } - else if (bits[0] | bits[1]) { - i = STRTOG_Denormal; - } - else { - b = buf; -#ifndef IGNORE_ZERO_SIGN - if (sign) - *b++ = '-'; -#endif - *b++ = '0'; - *b = 0; - return b; - } - ex -= 0x3fff + 63; - mode = 2; - if (ndig <= 0) { - if (bufsize < 32) - return 0; - mode = 0; - } - s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se); - return g__fmt(buf, s, se, decpt, sign, bufsize); - } diff --git a/third_party/gdtoa/g_xLfmt_p.c b/third_party/gdtoa/g_xLfmt_p.c deleted file mode 100644 index 123fefd7..00000000 --- a/third_party/gdtoa/g_xLfmt_p.c +++ /dev/null @@ -1,126 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - - extern ULong NanDflt_xL_D2A[3]; - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#endif -#ifdef IEEE_8087 -#define _0 2 -#define _1 1 -#define _2 0 -#endif - - char* -#ifdef KR_headers -g_xLfmt_p(buf, V, ndig, bufsize, nik) char *buf; char *V; int ndig; size_t bufsize; int nik; -#else -g_xLfmt_p(char *buf, void *V, int ndig, size_t bufsize, int nik) -#endif -{ - static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, 0, Int_max }; - char *b, *s, *se; - ULong bits[2], *L, sign; - int decpt, ex, i, mode; -#ifdef Honor_FLT_ROUNDS -#include "third_party/gdtoa/gdtoa_fltrnds.inc" -#else -#define fpi &fpi0 -#endif - - if (ndig < 0) - ndig = 0; - if (bufsize < (size_t)(ndig + 10)) - return 0; - - L = (ULong*)V; - sign = L[_0] & 0x80000000L; - bits[1] = L[_1]; - bits[0] = L[_2]; - if ( (ex = (L[_0] >> 16) & 0x7fff) !=0) { - if (ex == 0x7fff) { - /* Infinity or NaN */ - if (nik < 0 || nik > 35) - nik = 0; - if (!bits[0] && bits[1] == 0x80000000) { - b = buf; - if (sign) - *b++ = '-'; - b = strcp(b, InfName[nik%6]); - } - else { - b = buf; - if (sign && nik < 18) - *b++ = '-'; - b = strcp(b, NanName[nik%3]); - if (nik > 5 && (nik < 12 - || bits[0] != NanDflt_xL_D2A[0] - || bits[1] != NanDflt_xL_D2A[1])) - b = add_nanbits(b, bufsize - (b-buf), bits, 2); - } - return b; - } - i = STRTOG_Normal; - } - else if (bits[0] | bits[1]) { - i = STRTOG_Denormal; - } - else { - b = buf; -#ifndef IGNORE_ZERO_SIGN - if (sign) - *b++ = '-'; -#endif - *b++ = '0'; - *b = 0; - return b; - } - ex -= 0x3fff + 63; - mode = 2; - if (ndig <= 0) { - if (bufsize < 32) - return 0; - mode = 0; - } - s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se); - return g__fmt(buf, s, se, decpt, sign, bufsize); - } diff --git a/third_party/gdtoa/g_xfmt.c b/third_party/gdtoa/g_xfmt.c index ae172c50..b6b1b1e2 100644 --- a/third_party/gdtoa/g_xfmt.c +++ b/third_party/gdtoa/g_xfmt.c @@ -52,12 +52,8 @@ THIS SOFTWARE. #define _4 0 #endif - char* -#ifdef KR_headers -g_xfmt(buf, V, ndig, bufsize) char *buf; char *V; int ndig; size_t bufsize; -#else +char* g_xfmt(char *buf, void *V, int ndig, size_t bufsize) -#endif { static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, 0, Int_max }; char *b, *s, *se; diff --git a/third_party/gdtoa/g_xfmt_p.c b/third_party/gdtoa/g_xfmt_p.c index f0954139..29ee7ecb 100644 --- a/third_party/gdtoa/g_xfmt_p.c +++ b/third_party/gdtoa/g_xfmt_p.c @@ -32,7 +32,7 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - extern UShort NanDflt_ldus_D2A[5]; +extern UShort __gdtoa_NanDflt_ldus[5]; #undef _0 #undef _1 @@ -54,12 +54,8 @@ THIS SOFTWARE. #define _4 0 #endif - char* -#ifdef KR_headers -g_xfmt_p(buf, V, ndig, bufsize, nik) char *buf; char *V; int ndig; size_t bufsize; int nik; -#else +char* g_xfmt_p(char *buf, void *V, int ndig, size_t bufsize, int nik) -#endif { static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, 0, Int_max }; char *b, *s, *se; @@ -98,10 +94,10 @@ g_xfmt_p(char *buf, void *V, int ndig, size_t bufsize, int nik) *b++ = '-'; b = strcp(b, NanName[nik%3]); if (nik > 5 && (nik < 12 - || L[_1] != NanDflt_ldus_D2A[3] - || L[_2] != NanDflt_ldus_D2A[2] - || L[_3] != NanDflt_ldus_D2A[1] - || L[_4] != NanDflt_ldus_D2A[0])) { + || L[_1] != __gdtoa_NanDflt_ldus[3] + || L[_2] != __gdtoa_NanDflt_ldus[2] + || L[_3] != __gdtoa_NanDflt_ldus[1] + || L[_4] != __gdtoa_NanDflt_ldus[0])) { bits[1] &= 0x7fffffff; b = add_nanbits(b, bufsize - (b-buf), bits, 2); } diff --git a/third_party/gdtoa/gdtoa.c b/third_party/gdtoa/gdtoa.c index 6a4b7a96..70d2573e 100644 --- a/third_party/gdtoa/gdtoa.c +++ b/third_party/gdtoa/gdtoa.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - static Bigint * -#ifdef KR_headers -bitstob(bits, nbits, bbits MTa) ULong *bits; int nbits; int *bbits; MTk -#else +static Bigint * bitstob(ULong *bits, int nbits, int *bbits MTd) -#endif { int i, k; Bigint *b; @@ -109,15 +105,8 @@ bitstob(ULong *bits, int nbits, int *bbits MTd) * calculation. */ - char * -gdtoa -#ifdef KR_headers - (fpi, be, bits, kindp, mode, ndigits, decpt, rve) - CONST FPI *fpi; int be; ULong *bits; - int *kindp, mode, ndigits, *decpt; char **rve; -#else - (CONST FPI *fpi, int be, ULong *bits, int *kindp, int mode, int ndigits, int *decpt, char **rve) -#endif +char * +gdtoa(CONST FPI *fpi, int be, ULong *bits, int *kindp, int mode, int ndigits, int *decpt, char **rve) { /* Arguments ndigits and decpt are similar to the second and third arguments of ecvt and fcvt; trailing zeros are suppressed from diff --git a/third_party/gdtoa/gdtoa.internal.h b/third_party/gdtoa/gdtoa.internal.h index 6d88abe3..2b587bfa 100644 --- a/third_party/gdtoa/gdtoa.internal.h +++ b/third_party/gdtoa/gdtoa.internal.h @@ -137,7 +137,6 @@ THIS SOFTWARE. * something other than "long long", #define Llong to be the name, * and if "unsigned Llong" does not work as an unsigned version of * Llong, #define #ULLong to be the corresponding unsigned type. - * #define KR_headers for old-style C function headers. * #define Bad_float_h if your system lacks a float.h or if it does not * define some or all of DBL_DIG, DBL_MAX_10_EXP, DBL_MAX_EXP, * FLT_RADIX, FLT_ROUNDS, and DBL_MAX. @@ -220,16 +219,6 @@ THIS SOFTWARE. * #define USE_LOCALE to use the current locale's decimal_point value. */ -#ifndef ANSI -#ifdef KR_headers -#define ANSI(x) () -#define Void /*nothing*/ -#else -#define ANSI(x) x -#define Void void -#endif -#endif /* ANSI */ - #ifndef Long #define Long int #endif @@ -241,11 +230,7 @@ typedef unsigned short UShort; #endif #ifndef CONST -#ifdef KR_headers -#define CONST /* blank */ -#else #define CONST const -#endif #endif /* CONST */ #ifdef DEBUG @@ -256,20 +241,20 @@ typedef unsigned short UShort; } #endif -#ifdef KR_headers -#define Char char -#else +/* #ifdef KR_headers */ +/* #define Char char */ +/* #else */ #define Char void -#endif +/* #endif */ #ifdef MALLOC -extern Char *MALLOC ANSI((size_t)); +extern Char *MALLOC(size_t); #else #define MALLOC malloc #endif #ifdef REALLOC -extern Char *REALLOC ANSI((Char *, size_t)); +extern Char *REALLOC(Char *, size_t); #else #define REALLOC realloc #endif @@ -476,11 +461,7 @@ Exactly one of IEEE_8087, IEEE_MC68k, VAX, or IBM should be defined. #ifdef RND_PRODQUOT #define rounded_product(a, b) a = rnd_prod(a, b) #define rounded_quotient(a, b) a = rnd_quot(a, b) -#ifdef KR_headers -extern double rnd_prod(), rnd_quot(); -#else extern double rnd_prod(double, double), rnd_quot(double, double); -#endif #else #define rounded_product(a, b) a *= b #define rounded_quotient(a, b) a /= b @@ -531,9 +512,9 @@ extern double rnd_prod(double, double), rnd_quot(double, double); #define MTb , &TI #define MTd , ThInfo **PTI #define MTk ThInfo **PTI; -extern void ACQUIRE_DTOA_LOCK ANSI((unsigned int)); -extern void FREE_DTOA_LOCK ANSI((unsigned int)); -extern unsigned int dtoa_get_threadno ANSI((void)); +extern void ACQUIRE_DTOA_LOCK(unsigned int); +extern void FREE_DTOA_LOCK(unsigned int); +extern unsigned int dtoa_get_threadno(void); #else /*}{*/ #define ACQUIRE_DTOA_LOCK(n) /*nothing*/ #define FREE_DTOA_LOCK(n) /*nothing*/ @@ -562,111 +543,114 @@ typedef struct ThInfo { #ifdef DECLARE_SIZE_T typedef unsigned int size_t; #endif -extern void memcpy_D2A ANSI((void *, const void *, size_t)); +extern void __gdtoa_memcpy(void *, const void *, size_t); #define Bcopy(x, y) \ - memcpy_D2A(&x->sign, &y->sign, y->wds * sizeof(ULong) + 2 * sizeof(int)) + __gdtoa_memcpy(&x->sign, &y->sign, y->wds * sizeof(ULong) + 2 * sizeof(int)) #else /* !NO_STRING_H */ #define Bcopy(x, y) \ memcpy(&x->sign, &y->sign, y->wds * sizeof(ULong) + 2 * sizeof(int)) #endif /* NO_STRING_H */ -#define Balloc Balloc_D2A -#define Bfree Bfree_D2A -#define InfName InfName_D2A -#define NanName NanName_D2A -#define ULtoQ ULtoQ_D2A -#define ULtof ULtof_D2A -#define ULtod ULtod_D2A -#define ULtodd ULtodd_D2A -#define ULtox ULtox_D2A -#define ULtoxL ULtoxL_D2A -#define add_nanbits add_nanbits_D2A -#define any_on any_on_D2A -#define b2d b2d_D2A -#define bigtens bigtens_D2A -#define cmp cmp_D2A -#define copybits copybits_D2A -#define d2b d2b_D2A -#define decrement decrement_D2A -#define diff diff_D2A -#define dtoa_result dtoa_result_D2A -#define g__fmt g__fmt_D2A -#define gethex gethex_D2A -#define hexdig hexdig_D2A -#define hexnan hexnan_D2A -#define hi0bits(x) hi0bits_D2A((ULong)(x)) -#define i2b i2b_D2A -#define increment increment_D2A -#define lo0bits lo0bits_D2A -#define lshift lshift_D2A -#define match match_D2A -#define mult mult_D2A -#define multadd multadd_D2A -#define nrv_alloc nrv_alloc_D2A -#define pow5mult pow5mult_D2A -#define quorem quorem_D2A -#define ratio ratio_D2A -#define rshift rshift_D2A -#define rv_alloc rv_alloc_D2A -#define s2b s2b_D2A -#define set_ones set_ones_D2A -#define strcp strcp_D2A -#define strtoIg strtoIg_D2A -#define sum sum_D2A -#define tens tens_D2A -#define tinytens tinytens_D2A -#define tinytens tinytens_D2A -#define trailz trailz_D2A -#define ulp ulp_D2A +#define Balloc __gdtoa_Balloc +#define Bfree __gdtoa_Bfree +#define InfName __gdtoa_InfName +#define NanName __gdtoa_NanName +#define ULtoQ __gdtoa_ULtoQ +#define ULtof __gdtoa_ULtof +#define ULtod __gdtoa_ULtod +#define ULtodd __gdtoa_ULtodd +#define ULtox __gdtoa_ULtox +#define ULtoxL __gdtoa_ULtoxL +#define add_nanbits __gdtoa_add_nanbits +#define any_on __gdtoa_any_on +#define b2d __gdtoa_b2d +#define bigtens __gdtoa_bigtens +#define cmp __gdtoa_cmp +#define copybits __gdtoa_copybits +#define d2b __gdtoa_d2b +#define decrement __gdtoa_decrement +#define diff __gdtoa_diff +#define dtoa_result __gdtoa_dtoa_result +#define g__fmt __gdtoa_g__fmt +#define gethex __gdtoa_gethex +#define hexdig __gdtoa_hexdig +#define hexnan __gdtoa_hexnan +#define hi0bits(x) __gdtoa_hi0bits((ULong)(x)) +#define i2b __gdtoa_i2b +#define increment __gdtoa_increment +#define lo0bits __gdtoa_lo0bits +#define lshift __gdtoa_lshift +#define match __gdtoa_match +#define mult __gdtoa_mult +#define multadd __gdtoa_multadd +#define nrv_alloc __gdtoa_nrv_alloc +#define pow5mult __gdtoa_pow5mult +#define quorem __gdtoa_quorem +#define ratio __gdtoa_ratio +#define rshift __gdtoa_rshift +#define rv_alloc __gdtoa_rv_alloc +#define s2b __gdtoa_s2b +#define set_ones __gdtoa_set_ones +#define strcp __gdtoa_strcp +#define strtoIg __gdtoa_strtoIg +#define sum __gdtoa_sum +#define tens __gdtoa_tens +#define tinytens __gdtoa_tinytens +#define tinytens __gdtoa_tinytens +#define trailz __gdtoa_trailz +#define ulp __gdtoa_ulp -extern char *add_nanbits ANSI((char *, size_t, ULong *, int)); -extern char *dtoa_result; -extern CONST double bigtens[], tens[], tinytens[]; -extern const unsigned char hexdig[]; -extern const char *const InfName[6], *const NanName[3]; +extern char *add_nanbits(char *, size_t, ULong *, int); -extern Bigint *Balloc ANSI((int MTd)); -extern void Bfree ANSI((Bigint * MTd)); -extern void ULtof ANSI((ULong *, ULong *, Long, int)); -extern void ULtod ANSI((ULong *, ULong *, Long, int)); -extern void ULtodd ANSI((ULong *, ULong *, Long, int)); -extern void ULtoQ ANSI((ULong *, ULong *, Long, int)); -extern void ULtox ANSI((UShort *, ULong *, Long, int)); -extern void ULtoxL ANSI((ULong *, ULong *, Long, int)); -extern ULong any_on ANSI((Bigint *, int)); -extern double b2d ANSI((Bigint *, int *)); -extern int cmp ANSI((Bigint *, Bigint *)); -extern void copybits ANSI((ULong *, int, Bigint *)); -extern Bigint *d2b ANSI((double, int *, int *MTd)); -extern void decrement ANSI((Bigint *)); -extern Bigint *diff ANSI((Bigint *, Bigint *MTd)); -extern char *g__fmt ANSI((char *, char *, char *, int, ULong, size_t)); -extern int gethex ANSI((CONST char **, CONST FPI *, Long *, Bigint **, - int MTd)); -extern void hexdig_init_D2A(Void); -extern int hexnan ANSI((CONST char **, CONST FPI *, ULong *)); -extern int hi0bits_D2A ANSI((ULong)); -extern Bigint *i2b ANSI((int MTd)); -extern Bigint *increment ANSI((Bigint * MTd)); -extern int lo0bits ANSI((ULong *)); -extern Bigint *lshift ANSI((Bigint *, int MTd)); -extern int match ANSI((CONST char **, char *)); -extern Bigint *mult ANSI((Bigint *, Bigint *MTd)); -extern Bigint *multadd ANSI((Bigint *, int, int MTd)); -extern char *nrv_alloc ANSI((char *, char **, int MTd)); -extern Bigint *pow5mult ANSI((Bigint *, int MTd)); -extern int quorem ANSI((Bigint *, Bigint *)); -extern double ratio ANSI((Bigint *, Bigint *)); -extern void rshift ANSI((Bigint *, int)); -extern char *rv_alloc ANSI((int MTd)); -extern Bigint *s2b ANSI((CONST char *, int, int, ULong, int MTd)); -extern Bigint *set_ones ANSI((Bigint *, int MTd)); -extern char *strcp ANSI((char *, const char *)); -extern int strtoIg ANSI((CONST char *, char **, CONST FPI *, Long *, Bigint **, - int *)); -extern Bigint *sum ANSI((Bigint *, Bigint *MTd)); -extern int trailz ANSI((Bigint *)); -extern double ulp ANSI((U *)); +hidden extern char *dtoa_result; +hidden extern CONST double bigtens[]; +hidden extern CONST double tens[]; +hidden extern CONST double tinytens[]; +hidden extern const unsigned char hexdig[]; +hidden extern const char *const InfName[6]; +hidden extern const char *const NanName[3]; + +extern Bigint *Balloc(int MTd); +extern void Bfree(Bigint *MTd); +extern void ULtof(ULong *, ULong *, Long, int); +extern void ULtod(ULong *, ULong *, Long, int); +extern void ULtodd(ULong *, ULong *, Long, int); +extern void ULtoQ(ULong *, ULong *, Long, int); +extern void ULtox(UShort *, ULong *, Long, int); +extern void ULtoxL(ULong *, ULong *, Long, int); +extern ULong any_on(Bigint *, int); +extern double b2d(Bigint *, int *); +extern int cmp(Bigint *, Bigint *); +extern void copybits(ULong *, int, Bigint *); +extern Bigint *d2b(double, int *, int *MTd); +extern void decrement(Bigint *); +extern Bigint *diff(Bigint *, Bigint *MTd); +extern char *g__fmt(char *, char *, char *, int, ULong, size_t); +extern int gethex(CONST char **, CONST FPI *, Long *, Bigint **, int MTd); +extern void __gdtoa_hexdig_init(void); +extern int hexnan(CONST char **, CONST FPI *, ULong *); +extern int __gdtoa_hi0bits(ULong); +extern Bigint *i2b(int MTd); +extern Bigint *increment(Bigint *MTd); +extern int lo0bits(ULong *); +extern Bigint *lshift(Bigint *, int MTd); +extern int match(CONST char **, char *); +extern Bigint *mult(Bigint *, Bigint *MTd); +extern Bigint *multadd(Bigint *, int, int MTd); +extern char *nrv_alloc(char *, char **, int MTd); +extern Bigint *pow5mult(Bigint *, int MTd); +extern int quorem(Bigint *, Bigint *); +extern double ratio(Bigint *, Bigint *); +extern void rshift(Bigint *, int); +extern char *rv_alloc(int MTd); +extern Bigint *s2b(CONST char *, int, int, ULong, int MTd); +extern Bigint *set_ones(Bigint *, int MTd); +extern char *strcp(char *, const char *); +extern int strtoIg(CONST char *, char **, CONST FPI *, Long *, Bigint **, + int *); +extern Bigint *sum(Bigint *, Bigint *MTd); +extern int trailz(Bigint *); +extern double ulp(U *); #ifdef __cplusplus } diff --git a/third_party/gdtoa/gethex.c b/third_party/gdtoa/gethex.c index 9a007768..c9dcb2c1 100644 --- a/third_party/gdtoa/gethex.c +++ b/third_party/gdtoa/gethex.c @@ -33,13 +33,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -gethex(sp, fpi, exp, bp, sign MTa) - CONST char **sp; CONST FPI *fpi; Long *exp; Bigint **bp; int sign; MTk -#else +int gethex( CONST char **sp, CONST FPI *fpi, Long *exp, Bigint **bp, int sign MTd) -#endif { Bigint *b; CONST unsigned char *decpt, *s0, *s, *s1; @@ -64,7 +59,7 @@ gethex( CONST char **sp, CONST FPI *fpi, Long *exp, Bigint **bp, int sign MTd) #endif #endif - /**** if (!hexdig['0']) hexdig_init_D2A(); ****/ + /**** if (!hexdig['0']) __gdtoa_hexdig_init(); ****/ *bp = 0; havedig = 0; s0 = *(CONST unsigned char **)sp + 2; diff --git a/third_party/gdtoa/gmisc.c b/third_party/gdtoa/gmisc.c index 23fa478f..1c3b692a 100644 --- a/third_party/gdtoa/gmisc.c +++ b/third_party/gdtoa/gmisc.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - void -#ifdef KR_headers -rshift(b, k) Bigint *b; int k; -#else +void rshift(Bigint *b, int k) -#endif { ULong *x, *x1, *xe, y; int n; @@ -65,12 +61,8 @@ rshift(Bigint *b, int k) b->x[0] = 0; } - int -#ifdef KR_headers -trailz(b) Bigint *b; -#else +int trailz(Bigint *b) -#endif { ULong L, *x, *xe; int n = 0; diff --git a/third_party/gdtoa/hd_init.c b/third_party/gdtoa/hd_init.c index e3888aeb..161e85f9 100644 --- a/third_party/gdtoa/hd_init.c +++ b/third_party/gdtoa/hd_init.c @@ -33,23 +33,19 @@ THIS SOFTWARE. * with " at " changed at "@" and " dot " changed to "."). */ #if 0 - unsigned char hexdig[256]; +unsigned char hexdig[256]; - static void -#ifdef KR_headers -htinit(h, s, inc) unsigned char *h; unsigned char *s; int inc; -#else +static void htinit(unsigned char *h, unsigned char *s, int inc) -#endif { int i, j; for(i = 0; (j = s[i]) !=0; i++) h[j] = i + inc; } - void -hexdig_init_D2A(Void) /* Use of hexdig_init omitted 20121220 to avoid a */ - /* race condition when multiple threads are used. */ +void +__gdtoa_hexdig_init(Void) /* Use of hexdig_init omitted 20121220 to avoid a */ + /* race condition when multiple threads are used. */ { #define USC (unsigned char *) htinit(hexdig, USC "0123456789", 0x10); @@ -57,7 +53,7 @@ hexdig_init_D2A(Void) /* Use of hexdig_init omitted 20121220 to avoid a */ htinit(hexdig, USC "ABCDEF", 0x10 + 10); } #else - const unsigned char hexdig[256] = { +const unsigned char hexdig[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/third_party/gdtoa/hexnan.c b/third_party/gdtoa/hexnan.c index 430dc0a7..422999dd 100644 --- a/third_party/gdtoa/hexnan.c +++ b/third_party/gdtoa/hexnan.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - static void -#ifdef KR_headers -L_shift(x, x1, i) ULong *x; ULong *x1; int i; -#else +static void L_shift(ULong *x, ULong *x1, int i) -#endif { int j; @@ -50,19 +46,14 @@ L_shift(ULong *x, ULong *x1, int i) } while(++x < x1); } - int -#ifdef KR_headers -hexnan(sp, fpi, x0) - CONST char **sp; CONST FPI *fpi; ULong *x0; -#else +int hexnan( CONST char **sp, CONST FPI *fpi, ULong *x0) -#endif { ULong c, h, *x, *x1, *xe; CONST char *s; int havedig, hd0, i, nbits; - /**** if (!hexdig['0']) hexdig_init_D2A(); ****/ + /**** if (!hexdig['0']) __gdtoa_hexdig_init(); ****/ nbits = fpi->nbits; x = x0 + (nbits >> kshift); if (nbits & kmask) diff --git a/third_party/gdtoa/misc.c b/third_party/gdtoa/misc.c index afc325a1..debea409 100644 --- a/third_party/gdtoa/misc.c +++ b/third_party/gdtoa/misc.c @@ -90,13 +90,8 @@ get_TI(void) #define p5s TI0.P5s #endif /*}}*/ - Bigint * -Balloc -#ifdef KR_headers - (k MTa) int k; MTk -#else - (int k MTd) -#endif +Bigint * +Balloc(int k MTd) { int x; Bigint *rv; @@ -152,13 +147,8 @@ Balloc return rv; } - void -Bfree -#ifdef KR_headers - (v MTa) Bigint *v; MTk -#else - (Bigint *v MTd) -#endif +void +Bfree(Bigint *v MTd) { #ifdef MULTIPLE_THREADS ThInfo *TI; @@ -187,13 +177,8 @@ Bfree } } - int -lo0bits -#ifdef KR_headers - (y) ULong *y; -#else - (ULong *y) -#endif +int +lo0bits(ULong *y) { int k; ULong x = *y; @@ -235,13 +220,8 @@ lo0bits return k; } - Bigint * -multadd -#ifdef KR_headers - (b, m, a MTa) Bigint *b; int m, a; MTk -#else - (Bigint *b, int m, int a MTd) /* multiply by m and add a */ -#endif +Bigint * +multadd(Bigint *b, int m, int a MTd) /* multiply by m and add a */ { int i, wds; #ifdef ULLong @@ -292,13 +272,8 @@ multadd return b; } - int -hi0bits_D2A -#ifdef KR_headers - (x) ULong x; -#else - (ULong x) -#endif +int +__gdtoa_hi0bits(ULong x) { int k = 0; @@ -326,13 +301,8 @@ hi0bits_D2A return k; } - Bigint * -i2b -#ifdef KR_headers - (i MTa) int i; MTk -#else - (int i MTd) -#endif +Bigint * +i2b(int i MTd) { Bigint *b; @@ -342,13 +312,8 @@ i2b return b; } - Bigint * -mult -#ifdef KR_headers - (a, b MTa) Bigint *a, *b; MTk -#else - (Bigint *a, Bigint *b MTd) -#endif +Bigint * +mult(Bigint *a, Bigint *b MTd) { Bigint *c; int k, wa, wb, wc; @@ -452,13 +417,8 @@ mult return c; } - Bigint * -pow5mult -#ifdef KR_headers - (b, k MTa) Bigint *b; int k; MTk -#else - (Bigint *b, int k MTd) -#endif +Bigint * +pow5mult(Bigint *b, int k MTd) { Bigint *b1, *p5, *p51; #ifdef MULTIPLE_THREADS @@ -524,13 +484,8 @@ pow5mult return b; } - Bigint * -lshift -#ifdef KR_headers - (b, k MTa) Bigint *b; int k; MTk -#else - (Bigint *b, int k MTd) -#endif +Bigint * +lshift(Bigint *b, int k MTd) { int i, k1, n, n1; Bigint *b1; @@ -578,13 +533,8 @@ lshift return b1; } - int -cmp -#ifdef KR_headers - (a, b) Bigint *a, *b; -#else - (Bigint *a, Bigint *b) -#endif +int +cmp(Bigint *a, Bigint *b) { ULong *xa, *xa0, *xb, *xb0; int i, j; @@ -612,13 +562,8 @@ cmp return 0; } - Bigint * -diff -#ifdef KR_headers - (a, b MTa) Bigint *a, *b; MTk -#else - (Bigint *a, Bigint *b MTd) -#endif +Bigint * +diff(Bigint *a, Bigint *b MTd) { Bigint *c; int i, wa, wb; @@ -706,13 +651,8 @@ diff return c; } - double -b2d -#ifdef KR_headers - (a, e) Bigint *a; int *e; -#else - (Bigint *a, int *e) -#endif +double +b2d(Bigint *a, int *e) { ULong *xa, *xa0, w, y, z; int k; @@ -775,13 +715,8 @@ b2d #undef d0 #undef d1 - Bigint * -d2b -#ifdef KR_headers - (dd, e, bits MTa) double dd; int *e, *bits; MTk -#else - (double dd, int *e, int *bits MTd) -#endif +Bigint * +d2b(double dd, int *e, int *bits MTd) { Bigint *b; U d; @@ -913,7 +848,7 @@ d2b #undef d0 #undef d1 - CONST double +CONST double #ifdef IEEE_Arith bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 }; CONST double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, 1e-256 @@ -928,7 +863,7 @@ CONST double tinytens[] = { 1e-16, 1e-32 }; #endif #endif - CONST double +CONST double tens[] = { 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, @@ -938,12 +873,8 @@ tens[] = { #endif }; - char * -#ifdef KR_headers -strcp_D2A(a, b) char *a; char *b; -#else -strcp_D2A(char *a, CONST char *b) -#endif +char * +__gdtoa_strcp(char *a, CONST char *b) { while((*a = *b++)) a++; @@ -952,12 +883,8 @@ strcp_D2A(char *a, CONST char *b) #ifdef NO_STRING_H - Char * -#ifdef KR_headers -memcpy_D2A(a, b, len) Char *a; Char *b; size_t len; -#else -memcpy_D2A(void *a1, void *b1, size_t len) -#endif +Char * +__gdtoa_memcpy(void *a1, void *b1, size_t len) { char *a = (char*)a1, *ae = a + len; char *b = (char*)b1, *a0 = a; diff --git a/third_party/gdtoa/printf.c.txt b/third_party/gdtoa/printf.c.txt deleted file mode 100644 index 80a39ff6..00000000 --- a/third_party/gdtoa/printf.c.txt +++ /dev/null @@ -1,10 +0,0 @@ - -/* clang-format off */ -#ifdef __sun -#define Use_GDTOA_Qtype -#else -#if defined(__i386) || defined(__x86_64) -#define Use_GDTOA_for_i386_long_double -#endif -#endif -#include "third_party/gdtoa/printf.c0" diff --git a/third_party/gdtoa/printf.c0 b/third_party/gdtoa/printf.c0 deleted file mode 100644 index 45c2a5a3..00000000 --- a/third_party/gdtoa/printf.c0 +++ /dev/null @@ -1,1635 +0,0 @@ -#include "third_party/gdtoa/stdio1.h" - -/**************************************************************** -Copyright (C) 1997, 1999, 2001 Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -****************************************************************/ - -/* This implements most of ANSI C's printf, fprintf, and sprintf, - * omitting L, with %.0g and %.0G giving the shortest decimal string - * that rounds to the number being converted, and with negative - * precisions allowed for %f. - */ - -#ifdef Use_GDTOA_for_i386_long_double /*{{*/ -#include "third_party/gdtoa/gdtoa.h" -#else /*}{*/ -#ifndef NO_PRINTF_A_FMT /*{*/ -#include "third_party/gdtoa/gdtoa.h" -#endif /*}*/ -#endif /*}}*/ - -#ifdef __i386 -#define NO_GDTOA_i386_Quad -#endif - -#ifdef Use_GDTOA_for_i386_long_double /*{*/ -#ifndef NO_GDTOA_i386_Quad /*{*/ -#define GDTOA_both -#define Use_GDTOA_Qtype -#ifdef __ICC__ /* or __INTEL_COMPILER__ or __INTEL_COMPILER ?? */ -#define GDTOA_Qtype _Quad -#else /*{*/ -#ifdef __linux -#define GDTOA_Qtype __float128 -#else -#undef GDTOA_both -#undef Use_GDTOA_Qtype -#endif -#endif /*}*/ -#endif /*} NO_GDTOA_i386_Quad */ -#endif /*} Use_GDTOA_for_i386_long_double */ - -#ifdef Use_GDTOA_Qtype /*{*/ -#ifndef GDTOA_H_INCLUDED -#include "third_party/gdtoa/gdtoa.h" -#endif -#ifndef GDTOA_Qtype -#define GDTOA_Qtype long double -#endif -#endif /*}*/ - -#ifdef NO_PRINTF_A_FMT /*{{*/ -#define WANT_A_FMT(x) /*nothing*/ -#else /*}{*/ -#define WANT_A_FMT(x) x -#endif /*}}*/ - - typedef struct -Finfo { - union { FILE *cf; char *sf; } u; - char *ob0, *obe1; - size_t lastlen; - } Finfo; - - typedef char *(*pgdtoa) ANSI((CONST FPI*, int be, ULong *bits, int *kind, int mode, int ndigits, int *decpt, char **rve)); - - typedef struct -FPBits { - ULong bits[4]; /* sufficient for quad; modify if considering wider types */ - FPI *fpi; - pgdtoa gdtoa; - int sign; - int ex; /* exponent */ - int kind; - } FPBits; - - typedef union U -{ - double d; - long double ld; -#ifdef GDTOA_Qtype - GDTOA_Qtype Qd; -#endif - unsigned int ui[4]; - unsigned short us[5]; - } U; - - typedef char *(*Putfunc) ANSI((Finfo*, int*)); - typedef void (*Fpbits) ANSI((U*, FPBits*)); - -/* Would have preferred typedef void (*Fpbits)(va_list*, FPBits*) - * but gcc is buggy in this regard. - */ - -#ifdef Use_GDTOA_for_i386_long_double /*{*/ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#define _3 3 -#define _4 4 -#endif -#ifdef IEEE_8087 -#define _0 4 -#define _1 3 -#define _2 2 -#define _3 1 -#define _4 0 -#endif - - static void -xfpbits(U *u, FPBits *b) -{ - ULong *bits; - int ex, i; - static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, 0, 0 /*not used*/ }; - - b->fpi = &fpi0; - b->gdtoa = gdtoa; - b->sign = u->us[_0] & 0x8000; - bits = b->bits; - bits[1] = (u->us[_1] << 16) | u->us[_2]; - bits[0] = (u->us[_3] << 16) | u->us[_4]; - if ( (ex = u->us[_0] & 0x7fff) !=0) { - i = STRTOG_Normal; - if (ex == 0x7fff) - /* Infinity or NaN */ - i = bits[0] | bits[1] ? STRTOG_NaN : STRTOG_Infinite; - } - else if (bits[0] | bits[1]) { - i = STRTOG_Denormal; - ex = 1; - } - else - i = STRTOG_Zero; - b->kind = i; - b->ex = ex - (0x3fff + 63); - } - -#undef _0 -#undef _1 -#undef _2 -#undef _3 -#undef _4 -#define GDTOA_LD_fpbits xfpbits -#endif /*} Use_GDTOA_for_i386_long_double */ - -#ifdef Use_GDTOA_Qtype /*{*/ -#include "third_party/gdtoa/gdtoa.h" -#ifndef GDTOA_Qtype -#define GDTOA_Qtype long double -#endif -#ifdef GDTOA_LD_fpbits -#define GDTOA_Q_fpbits Qfpbits -#else -#define GDTOA_LD_fpbits Qfpbits -#endif - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#define _3 3 -#endif -#ifdef IEEE_8087 -#define _0 3 -#define _1 2 -#define _2 1 -#define _3 0 -#endif - - static void -Qfpbits(U *u, FPBits *b) -{ - ULong *bits; - int ex, i; - static const FPI fpi0 = { 113, 1-16383-113+1, 32766 - 16383 - 113 + 1, 1, 0, 0 /*not used*/ }; - - b->fpi = &fpi0; - b->gdtoa = gdtoa; - b->sign = u->ui[_0] & 0x80000000L; - bits = b->bits; - bits[3] = u->ui[_0] & 0xffff; - bits[2] = u->ui[_1]; - bits[1] = u->ui[_2]; - bits[0] = u->ui[_3]; - if ( (ex = (u->ui[_0] & 0x7fff0000L) >> 16) !=0) { - if (ex == 0x7fff) { - /* Infinity or NaN */ - i = bits[0] | bits[1] | bits[2] | bits[3] - ? STRTOG_NaN : STRTOG_Infinite; - } - else { - i = STRTOG_Normal; - bits[3] |= 0x10000; - } - } - else if (bits[0] | bits[1] | bits[2] | bits[3]) { - i = STRTOG_Denormal; - ex = 1; - } - else - i = STRTOG_Zero; - b->kind = i; - b->ex = ex - (0x3fff + 112); - } - -#undef _0 -#undef _1 -#undef _2 -#undef _3 -#endif /*} GDTOA_Qtype */ - -#ifdef KR_headers -#define Const /* const */ -#define Voidptr char* -#ifndef size_t__ -#define size_t int -#define size_t__ -#endif - -#else - -#define Const const -#define Voidptr void* - -#endif - -#undef MESS -#ifndef Stderr -#define Stderr stderr -#endif - -#ifdef _windows_ -#undef PF_BUF -#define MESS -#include "third_party/gdtoa/mux0.h" -#define stdout_or_err(f) (f == stdout) -#else -#define stdout_or_err(f) (f == Stderr || f == stdout) -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - extern char *dtoa ANSI((double, int, int, int*, int*, char **)); - extern void freedtoa ANSI((char*)); - - - -#ifdef USE_ULDIV -/* This is for avoiding 64-bit divisions on the DEC Alpha, since */ -/* they are not portable among variants of OSF1 (DEC's Unix). */ - -#define ULDIV(a,b) uldiv_ASL(a,(unsigned long)(b)) - -#ifndef LLBITS -#define LLBITS 6 -#endif -#ifndef ULONG -#define ULONG unsigned long -#endif - - static int -klog(ULONG x) -{ - int k, rv = 0; - - if (x > 1L) - for(k = 1 << LLBITS-1;;) { - if (x >= (1L << k)) { - rv |= k; - x >>= k; - } - if (!(k >>= 1)) - break; - } - return rv; - } - - ULONG -uldiv_ASL(ULONG a, ULONG b) -{ - int ka; - ULONG c, k; - static ULONG b0; - static int kb; - - if (a < b) - return 0; - if (b != b0) { - b0 = b; - kb = klog(b); - } - k = 1; - if ((ka = klog(a) - kb) > 0) { - k <<= ka; - b <<= ka; - } - c = 0; - for(;;) { - if (a >= b) { - a -= b; - c |= k; - } - if (!(k >>= 1)) - break; - a <<= 1; - } - return c; - } - -#else -#define ULDIV(a,b) a / b -#endif /* USE_ULDIV */ - -#ifdef PF_BUF -FILE *stderr_ASL = (FILE*)&stderr_ASL; -void (*pfbuf_print_ASL) ANSI((char*)); -char *pfbuf_ASL; -static char *pfbuf_next; -static size_t pfbuf_len; -extern Char *mymalloc_ASL ANSI((size_t)); -extern Char *myralloc_ASL ANSI((void *, size_t)); - -#undef fflush -#ifdef old_fflush_ASL -#define fflush old_fflush_ASL -#endif - - void -fflush_ASL(FILE *f) -{ - if (f == stderr_ASL) { - if (pfbuf_ASL && pfbuf_print_ASL) { - (*pfbuf_print_ASL)(pfbuf_ASL); - free(pfbuf_ASL); - pfbuf_ASL = 0; - } - } - else - fflush(f); - } - - static void -pf_put(char *buf, int len) -{ - size_t x, y; - if (!pfbuf_ASL) { - x = len + 256; - if (x < 512) - x = 512; - pfbuf_ASL = pfbuf_next = (char*)mymalloc_ASL(pfbuf_len = x); - } - else if ((y = (pfbuf_next - pfbuf_ASL) + len) >= pfbuf_len) { - x = pfbuf_len; - while((x <<= 1) <= y); - y = pfbuf_next - pfbuf_ASL; - pfbuf_ASL = (char*)myralloc_ASL(pfbuf_ASL, x); - pfbuf_next = pfbuf_ASL + y; - pfbuf_len = x; - } - memcpy(pfbuf_next, buf, len); - pfbuf_next += len; - *pfbuf_next = 0; - } - - static char * -pfput(Finfo *f, int *rvp) -{ - int n; - char *ob0 = f->ob0; - *rvp += n = (int)(f->obe1 - ob0); - pf_put(ob0, n); - return ob0; - } -#endif /* PF_BUF */ - - static char * -Fput -#ifdef KR_headers - (f, rvp) Finfo *f; int *rvp; -#else - (Finfo *f, int *rvp) -#endif -{ - char *ob0 = f->ob0; - - *rvp += f->obe1 - ob0; - *f->obe1 = 0; - fputs(ob0, f->u.cf); - return ob0; - } - - -#ifdef _windows_ -int stdout_fileno_ASL = 1; - - static char * -Wput -#ifdef KR_headers - (f, rvp) Finfo *f; int *rvp; -#else - (Finfo *f, int *rvp) -#endif -{ - char *ob0 = f->ob0; - - *rvp += f->obe1 - ob0; - *f->obe1 = 0; - mwrite(ob0, f->obe1 - ob0); - return ob0; - } -#endif /*_windows_*/ - - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#endif -#ifdef IEEE_8087 -#define _0 1 -#define _1 0 -#endif - - static void -dfpbits(U *u, FPBits *b) -{ - ULong *bits; - int ex, i; - static const FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, 0, 0 /*not used*/ }; - - b->fpi = &fpi0; - b->gdtoa = gdtoa; - b->sign = u->ui[_0] & 0x80000000L; - bits = b->bits; - bits[1] = u->ui[_0] & 0xfffff; - bits[0] = u->ui[_1]; - if ( (ex = (u->ui[_0] & 0x7ff00000L) >> 20) !=0) { - if (ex == 0x7ff) { - /* Infinity or NaN */ - i = bits[0] | bits[1] ? STRTOG_NaN : STRTOG_Infinite; - } - else { - i = STRTOG_Normal; - bits[1] |= 0x100000; - } - } - else if (bits[0] | bits[1]) { - i = STRTOG_Denormal; - ex = 1; - } - else - i = STRTOG_Zero; - b->kind = i; - b->ex = ex - (0x3ff + 52); - } - -#undef _0 -#undef _1 - -#ifdef Honor_FLT_ROUNDS /*{{*/ -#ifdef Trust_FLT_ROUNDS /*{{*/ -#define RoundCheck if (Rounding == -1) Rounding = Flt_Rounds; if (Rounding != 1){\ - fpi1 = *fpb.fpi; fpi1.rounding = Rounding; fpb.fpi = &fpi1;} -#else /*}{*/ -#define RoundCheck if (Rounding == -1) { Rounding = 1; switch((fegetround()) {\ - case FE_TOWARDZERO: Rounding = 0; break;\ - case FE_UPWARD: Rounding = 2; break;\ - case FE_DOWNWARD: Rounding = 3; }}\ - if (Rounding != 1){\ - fpi1 = *fpb.fpi; fpi1.rounding = Rounding; fpb.fpi = &fpi1;} -#endif /*}}*/ -#else /*}{*/ -#define RoundCheck /*nothing*/ -#endif /*}}*/ - -#ifndef NO_PRINTF_A_FMT /*{*/ - static int -fpiprec(FPBits *b) /* return number of hex digits minus 1, or 0 for zero */ -{ - FPI *fpi; - ULong *bits; - int i, j, k, m; - - if (b->kind == STRTOG_Zero) - return b->ex = 0; - fpi = b->fpi; - bits = b->bits; - for(k = (fpi->nbits - 1) >> 2; k > 0; --k) - if ((bits[k >> 3] >> 4*(k & 7)) & 0xf) { - m = k >> 3; - for(i = 0; i <= m; ++i) - if (bits[i]) { - if (i > 0) { - k -= 8*i; - b->ex += 32*i; - for(j = i; j <= m; ++j) - bits[j-i] = bits[j]; - } - break; - } - for(i = 0; i < 28 && !((bits[0] >> i) & 0xf); i += 4); - if (i) { - b->ex += i; - m = k >> 3; - k -= (i >> 2); - for(j = 0;;++j) { - bits[j] >>= i; - if (j == m) - break; - bits[j] |= bits[j+1] << (32 - i); - } - } - break; - } - return k; - } - - static int -bround(FPBits *b, int prec, int prec1) /* round to prec hex digits after the "." */ -{ /* prec1 = incoming precision (after ".") */ - ULong *bits, t; - int i, inc, j, k, m, n; -#ifdef Honor_FLT_ROUNDS - int rounding = fpi->rounding; - - if (rounding > FPI_Round_near && b->sign) - rounding = FPI_Round_up + FPI_Round_down - rounding; - if (rounding == FPI_Round_down) - rounding = FPI_Round_zero; -#endif - m = prec1 - prec; - bits = b->bits; - inc = 0; -#ifdef Honor_FLT_ROUNDS - switch(rounding) { - case FPI_Round_up: - for(i = 0; i < m; i += 8) - if (bits[i>>3]) - goto inc1; - if ((j = i - m) > 0 && bits[(i-8)>>3] << j*4) - goto inc1; - break; - case FPI_Round_near: -#endif - k = m - 1; - if ((t = bits[k >> 3] >> (j = (k&7)*4)) & 8) { - if (t & 7) - goto inc1; - if (j && bits[k >> 3] << (32 - j)) - goto inc1; - while(k >= 8) { - k -= 8; - if (bits[k>>3]) { - inc1: - inc = 1; - goto haveinc; - } - } - } -#ifdef Honor_FLT_ROUNDS - } -#endif - haveinc: - b->ex += m*4; - i = m >> 3; - k = prec1 >> 3; - j = i; - if ((n = 4*(m & 7))) - for(;; ++j) { - bits[j-i] = bits[j] >> n; - if (j == k) - break; - bits[j-i] |= bits[j+1] << (32-n); - } - else - for(;; ++j) { - bits[j-i] = bits[j]; - if (j == k) - break; - } - k = prec >> 3; - if (inc) { - for(j = 0; !(++bits[j] & 0xffffffff); ++j); - if (j > k) { - onebit: - bits[0] = 1; - b->ex += 4*prec; - return 1; - } - if ((j = prec & 7) < 7 && bits[k] >> (j+1)*4) - goto onebit; - } - for(i = 0; !(bits[i >> 3] & (0xf << 4*(i&7))); ++i); - if (i) { - b->ex += 4*i; - prec -= i; - j = i >> 3; - i &= 7; - i *= 4; - for(m = j; ; ++m) { - bits[m-j] = bits[m] >> i; - if (m == k) - break; - bits[m-j] |= bits[m+1] << (32 - i); - } - } - return prec; - } -#endif /*}NO_PRINTF_A_FMT*/ - -#define put(x) { *outbuf++ = x; if (outbuf == obe) outbuf = (*fput)(f,&rv); } - - static int -x_sprintf -#ifdef KR_headers - (obe, fput, f, fmt, ap) - char *obe, *fmt; Finfo *f; Putfunc fput; va_list ap; -#else - (char *obe, Putfunc fput, Finfo *f, const char *fmt, va_list ap) -#endif -{ - FPBits fpb; - Fpbits fpbits; - U u; - char *digits, *ob0, *outbuf, *s, *s0, *se; - Const char *fmt0; - char buf[32]; - long i; - unsigned long j, ul; - double x; - int alt, base, c, decpt, dot, conv, i1, k, lead0, left, - len, prec, prec1, psign, rv, sign, width; - long Ltmp, *ip; - short sh; - unsigned short us; - unsigned int ui; -#ifdef Honor_FLT_ROUNDS - FPI fpi1; - int Rounding = -1; -#endif -#ifndef NO_PRINTF_A_FMT /*{*/ - int bex, bw; -#endif /*} NO_PRINTF_A_FMT */ - static char hex[] = "0123456789abcdefpx"; - static char Hex[] = "0123456789ABCDEFPX"; - - ob0 = outbuf = f->ob0; - rv = 0; - for(;;) { - for(;;) { - switch(c = *fmt++) { - case 0: - goto done; - case '%': - break; - default: - put(c) - continue; - } - break; - } - alt=dot=lead0=left=len=prec=psign=sign=width=0; - fpbits = dfpbits; - fmt0 = fmt; - fmtloop: - switch(conv = *fmt++) { - case ' ': - case '+': - sign = conv; - goto fmtloop; - case '-': - if (dot) - psign = 1; - else - left = 1; - goto fmtloop; - case '#': - alt = 1; - goto fmtloop; - case '0': - if (!lead0 && !dot) { - lead0 = 1; - goto fmtloop; - } - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - k = conv - '0'; - while((c = *fmt) >= '0' && c <= '9') { - k = 10*k + c - '0'; - fmt++; - } - if (dot) - prec = psign ? -k : k; - else - width = k; - goto fmtloop; - case 'h': - len = 2; - goto fmtloop; - case 'L': -#ifdef GDTOA_LD_fpbits /*{*/ - fpbits = GDTOA_LD_fpbits; -#ifdef GDTOA_Q_fpbits - if (*fmt == 'q') { - ++fmt; - fpbits = Qfpbits; - } -#endif -#endif /*}*/ - goto fmtloop; - case 'l': - len = 1; - goto fmtloop; - case '.': - dot = 1; - goto fmtloop; - case '*': - k = va_arg(ap, int); - if (dot) - prec = k; - else { - if (k < 0) { - sign = '-'; - k = -k; - } - width = k; - } - goto fmtloop; - case 'c': - c = va_arg(ap, int); - put(c) - continue; - case '%': - put(conv) - continue; - case 'u': - switch(len) { - case 0: - ui = va_arg(ap, int); - i = ui; - break; - case 1: - i = va_arg(ap, long); - break; - case 2: - us = va_arg(ap, int); - i = us; - } - sign = 0; - goto have_i; - case 'i': - case 'd': - switch(len) { - case 0: - k = va_arg(ap, int); - i = k; - break; - case 1: - i = va_arg(ap, long); - break; - case 2: - sh = va_arg(ap, int); - i = sh; - } - if (i < 0) { - sign = '-'; - i = -i; - } - have_i: - base = 10; - ul = i; - digits = hex; - baseloop: - if (dot) - lead0 = 0; - s = buf; - if (!ul) - alt = 0; - do { - j = ULDIV(ul, base); - *s++ = digits[ul - base*j]; - } - while((ul = j)); - prec -= c = s - buf; - if (alt && conv == 'o' && prec <= 0) - prec = 1; - if ((width -= c) > 0) { - if (prec > 0) - width -= prec; - if (sign) - width--; - if (alt == 2) - width--; - } - if (left) { - if (alt == 2) - put('0') /* for 0x */ - if (sign) - put(sign) - while(--prec >= 0) - put('0') - do put(*--s) - while(s > buf); - while(--width >= 0) - put(' ') - continue; - } - if (width > 0) { - if (lead0) { - if (alt == 2) - put('0') - if (sign) - put(sign) - while(--width >= 0) - put('0') - goto s_loop; - } - else - while(--width >= 0) - put(' ') - } - if (alt == 2) - put('0') - if (sign) - put(sign) - s_loop: - while(--prec >= 0) - put('0') - do put(*--s) - while(s > buf); - continue; - case 'n': - ip = va_arg(ap, long*); - if (!ip) - ip = &Ltmp; - c = outbuf - ob0 + rv; - switch(len) { - case 0: - *(int*)ip = c; - break; - case 1: - *ip = c; - break; - case 2: - *(short*)ip = c; - } - break; - case 'p': - len = alt = 1; - /* no break */ - case 'x': - digits = hex; - goto more_x; - case 'X': - digits = Hex; - more_x: - if (alt) { - alt = 2; - sign = conv; - } - else - sign = 0; - base = 16; - get_u: - switch(len) { - case 0: - ui = va_arg(ap, int); - ul = ui; - break; - case 1: - ul = va_arg(ap, long); - break; - case 2: - us = va_arg(ap, int); - ul = us; - } - if (!ul) - sign = alt = 0; - goto baseloop; - case 'o': - base = 8; - digits = hex; - goto get_u; - case 's': - s0 = 0; - s = va_arg(ap, char*); - if (!s) - s = ""; - if (prec < 0) - prec = 0; - have_s: - if (dot) { - for(c = 0; c < prec; c++) - if (!s[c]) - break; - prec = c; - } - else - prec = strlen(s); - width -= prec; - if (!left) - while(--width >= 0) - put(' ') - while(--prec >= 0) - put(*s++) - while(--width >= 0) - put(' ') - if (s0) - freedtoa(s0); - continue; - case 'f': - if (!dot) - prec = 6; -#ifdef GDTOA_H_INCLUDED - if (fpbits == dfpbits) { -#endif - x = va_arg(ap, double); - s = s0 = dtoa(x, 3, prec, &decpt, &fpb.sign, &se); -#ifdef GDTOA_H_INCLUDED - } - else { -#ifdef GDTOA_both - if (fpbits == GDTOA_LD_fpbits) - u.ld = va_arg(ap, long double); - else - u.Qd = va_arg(ap, GDTOA_Qtype); -#else - u.ld = va_arg(ap, long double); -#endif - fpbits(&u, &fpb); - RoundCheck - s = s0 = fpb.gdtoa(fpb.fpi, fpb.ex, fpb.bits, - &fpb.kind, 3, prec, &decpt, &se); - } -#endif - if (decpt == 9999) { - fmt9999: - dot = prec = alt = 0; - if (*s == 'N') - goto have_s; - decpt = strlen(s); - } - f_fmt: - if (fpb.sign && (x||sign)) - sign = '-'; - if (prec > 0) - width -= prec; - if (width > 0) { - if (sign) - --width; - if (decpt <= 0) { - --width; - if (prec > 0) - --width; - } - else { - if (s == se) - decpt = 1; - width -= decpt; - if (prec > 0 || alt) - --width; - } - } - if (width > 0 && !left) { - if (lead0) { - if (sign) - put(sign) - sign = 0; - do put('0') - while(--width > 0); - } - else do put(' ') - while(--width > 0); - } - if (sign) - put(sign) - if (decpt <= 0) { - put('0') - if (prec > 0 || alt) - put('.') - while(decpt < 0) { - put('0') - prec--; - decpt++; - } - } - else { - do { - if ((c = *s)) - s++; - else - c = '0'; - put(c) - } - while(--decpt > 0); - if (prec > 0 || alt) - put('.') - } - while(--prec >= 0) { - if ((c = *s)) - s++; - else - c = '0'; - put(c) - } - while(--width >= 0) - put(' ') - if (s0) - freedtoa(s0); - continue; - case 'G': - case 'g': - if (!dot) - prec = 6; - if (prec < 0) - prec = 0; -#ifdef GDTOA_H_INCLUDED - if (fpbits == dfpbits) { -#endif - x = va_arg(ap, double); - s = s0 = dtoa(x, prec ? 2 : 0, prec, &decpt, - &fpb.sign, &se); -#ifdef GDTOA_H_INCLUDED - } - else { -#ifdef GDTOA_both - if (fpbits == GDTOA_LD_fpbits) - u.ld = va_arg(ap, long double); - else - u.Qd = va_arg(ap, GDTOA_Qtype); -#else - u.ld = va_arg(ap, long double); -#endif - fpbits(&u, &fpb); - RoundCheck - s = s0 = fpb.gdtoa(fpb.fpi, fpb.ex, fpb.bits, - &fpb.kind, prec ? 2 : 0, prec, &decpt, &se); - } -#endif - if (decpt == 9999) - goto fmt9999; - c = se - s; - prec1 = prec; - if (!prec) { - prec = c; - prec1 = c + (s[1] || alt ? 5 : 4); - /* %.0g gives 10 rather than 1e1 */ - } - if (decpt > -4 && decpt <= prec1) { - if (alt) - prec -= decpt; - else - prec = c - decpt; - if (prec < 0) - prec = 0; - goto f_fmt; - } - conv -= 2; - if (!alt && prec > c) - prec = c; - --prec; - goto e_fmt; - case 'e': - case 'E': - if (!dot) - prec = 6; - if (prec < 0) - prec = 0; -#ifdef GDTOA_H_INCLUDED - if (fpbits == dfpbits) { -#endif - x = va_arg(ap, double); - s = s0 = dtoa(x, prec ? 2 : 0, prec+1, &decpt, - &fpb.sign, &se); -#ifdef GDTOA_H_INCLUDED - } - else { -#ifdef GDTOA_both - if (fpbits == GDTOA_LD_fpbits) - u.ld = va_arg(ap, long double); - else - u.Qd = va_arg(ap, GDTOA_Qtype); -#else - u.ld = va_arg(ap, long double); -#endif - fpbits(&u, &fpb); - RoundCheck - s = s0 = fpb.gdtoa(fpb.fpi, fpb.ex, fpb.bits, - &fpb.kind, prec ? 2 : 0, prec, &decpt, &se); - } -#endif - if (decpt == 9999) - goto fmt9999; - e_fmt: - if (fpb.sign && (x||sign)) - sign = '-'; - if ((width -= prec + 5) > 0) { - if (sign) - --width; - if (prec || alt) - --width; - } - if ((c = --decpt) < 0) - c = -c; - while(c >= 100) { - --width; - c /= 10; - } - if (width > 0 && !left) { - if (lead0) { - if (sign) - put(sign) - sign = 0; - do put('0') - while(--width > 0); - } - else do put(' ') - while(--width > 0); - } - if (sign) - put(sign) - put(*s++) - if (prec || alt) - put('.') - while(--prec >= 0) { - if ((c = *s)) - s++; - else - c = '0'; - put(c) - } - put(conv) - if (decpt < 0) { - put('-') - decpt = -decpt; - } - else - put('+') - for(c = 2, k = 10; 10*k <= decpt; c++, k *= 10); - for(;;) { - i1 = decpt / k; - put(i1 + '0') - if (--c <= 0) - break; - decpt -= i1*k; - decpt *= 10; - } - while(--width >= 0) - put(' ') - freedtoa(s0); - continue; -#ifndef NO_PRINTF_A_FMT - case 'a': - digits = hex; - goto more_a; - case 'A': - digits = Hex; - more_a: -#ifdef GDTOA_H_INCLUDED /*{{*/ - if (fpbits == dfpbits) - u.d = va_arg(ap, double); -#ifdef GDTOA_both /*{*/ - else if (fpbits == GDTOA_LD_fpbits) - u.ld = va_arg(ap, long double); - else - u.Qd = va_arg(ap, GDTOA_Qtype); -#else - else - u.ld = va_arg(ap, long double); -#endif /*}*/ -#else /*}{*/ - u.d = va_arg(ap, double); -#endif /*}}*/ - fpbits(&u, &fpb); - if (fpb.kind == STRTOG_Infinite) { - s = "Infinity"; - s0 = 0; - goto fmt9999; - } - if (fpb.kind == STRTOG_NaN) { - s = "NaN"; - s0 = 0; - goto fmt9999; - } - prec1 = fpiprec(&fpb); - if (dot && prec < prec1) - prec1 = bround(&fpb, prec, prec1); - bw = 1; - bex = fpb.ex + 4*prec1; - if (bex) { - if ((i1 = bex) < 0) - i1 = -i1; - while(i1 >= 10) { - ++bw; - i1 /= 10; - } - } - if (fpb.sign && (sign || fpb.kind != STRTOG_Zero)) - sign = '-'; - if ((width -= bw + 5) > 0) { - if (sign) - --width; - if (prec1 || alt) - --width; - } - if ((width -= prec1) > 0 && !left && !lead0) { - do put(' ') - while(--width > 0); - } - if (sign) - put(sign) - put('0') - put(digits[17]) - if (lead0 && width > 0 && !left) { - do put('0') - while(--width > 0); - } - i1 = prec1 & 7; - k = prec1 >> 3; - put(digits[(fpb.bits[k] >> 4*i1) & 0xf]) - if (prec1 > 0 || alt) - put('.') - if (prec1 > 0) { - prec -= prec1; - while(prec1 > 0) { - if (--i1 < 0) { - if (--k < 0) - break; - i1 = 7; - } - put(digits[(fpb.bits[k] >> 4*i1) & 0xf]) - --prec1; - } - if (alt && prec > 0) - do put(0) - while(--prec > 0); - } - put(digits[16]) - if (bex < 0) { - put('-') - bex = -bex; - } - else - put('+') - for(c = 1; 10*c <= bex; c *= 10); - for(;;) { - i1 = bex / c; - put('0' + i1) - if (!--bw) - break; - bex -= i1 * c; - bex *= 10; - } - while(--width >= 0) - put(' ') - continue; -#endif /* NO_PRINTF_A_FMT */ - default: - put('%') - while(fmt0 < fmt) - put(*fmt0++) - continue; - } - } - done: - *outbuf = 0; - return (f->lastlen = outbuf - ob0) + rv; - } - -#define Bsize 256 - - int -Printf -#ifdef KR_headers - (va_alist) - va_dcl -{ - char *fmt; - - va_list ap; - int rv; - Finfo f; - char buf[Bsize]; - - va_start(ap); - fmt = va_arg(ap, char*); - /*}*/ -#else - (const char *fmt, ...) -{ - va_list ap; - int rv; - Finfo f; - char buf[Bsize]; - - va_start(ap, fmt); -#endif - f.u.cf = stdout; - f.ob0 = buf; - f.obe1 = buf + Bsize - 1; -#ifdef _windows_ - if (fileno(stdout) == stdout_fileno_ASL) { - rv = x_sprintf(f.obe1, Wput, &f, fmt, ap); - mwrite(buf, f.lastlen); - } - else -#endif -#ifdef PF_BUF - if (stdout == stderr_ASL) { - rv = x_sprintf(f.obe1, pfput, &f, fmt, ap); - pf_put(buf, f.lastlen); - } - else -#endif - { - rv = x_sprintf(f.obe1, Fput, &f, fmt, ap); - fputs(buf, stdout); - } - va_end(ap); - return rv; - } - - static char * -Sput -#ifdef KR_headers - (f, rvp) Finfo *f; int *rvp; -#else - (Finfo *f, int *rvp) -#endif -{ - if (Printf("\nBUG! Sput called!\n", f, rvp)) - /* pass vp, rvp and return 0 to shut diagnostics off */ - exit(250); - return 0; - } - - int -Sprintf -#ifdef KR_headers - (va_alist) - va_dcl -{ - char *s, *fmt; - va_list ap; - int rv; - Finfo f; - - va_start(ap); - s = va_arg(ap, char*); - fmt = va_arg(ap, char*); - /*}*/ -#else - (char *s, const char *fmt, ...) -{ - va_list ap; - int rv; - Finfo f; - - va_start(ap, fmt); -#endif - f.ob0 = s; - rv = x_sprintf(s, Sput, &f, fmt, ap); - va_end(ap); - return rv; - } - - int -Fprintf -#ifdef KR_headers - (va_alist) - va_dcl -{ - FILE *F; - char *s, *fmt; - va_list ap; - int rv; - Finfo f; - char buf[Bsize]; - - va_start(ap); - F = va_arg(ap, FILE*); - fmt = va_arg(ap, char*); - /*}*/ -#else - (FILE *F, const char *fmt, ...) -{ - va_list ap; - int rv; - Finfo f; - char buf[Bsize]; - - va_start(ap, fmt); -#endif - f.u.cf = F; - f.ob0 = buf; - f.obe1 = buf + Bsize - 1; -#ifdef MESS - if (stdout_or_err(F)) { -#ifdef _windows_ - if (fileno(stdout) == stdout_fileno_ASL) { - rv = x_sprintf(f.obe1, Wput, &f, fmt, ap); - mwrite(buf, f.lastlen); - } - else -#endif -#ifdef PF_BUF - if (F == stderr_ASL) { - rv = x_sprintf(f.obe1, pfput, &f, fmt, ap); - pf_put(buf, f.lastlen); - } - else -#endif - { - rv = x_sprintf(f.obe1, Fput, &f, fmt, ap); - fputs(buf, F); - } - } - else -#endif /*MESS*/ - { -#ifdef PF_BUF - if (F == stderr_ASL) { - rv = x_sprintf(f.obe1, pfput, &f, fmt, ap); - pf_put(buf, f.lastlen); - } - else -#endif - { - rv = x_sprintf(f.obe1, Fput, &f, fmt, ap); - fputs(buf, F); - } - } - va_end(ap); - return rv; - } - - int -Vsprintf -#ifdef KR_headers - (s, fmt, ap) char *s, *fmt; va_list ap; -#else - (char *s, const char *fmt, va_list ap) -#endif -{ - Finfo f; - return x_sprintf(f.ob0 = s, Sput, &f, fmt, ap); - } - - int -Vfprintf -#ifdef KR_headers - (F, fmt, ap) FILE *F; char *fmt; va_list ap; -#else - (FILE *F, const char *fmt, va_list ap) -#endif -{ - char buf[Bsize]; - int rv; - Finfo f; - - f.u.cf = F; - f.ob0 = buf; - f.obe1 = buf + Bsize - 1; -#ifdef MESS - if (stdout_or_err(F)) { -#ifdef _windows_ - if (fileno(stdout) == stdout_fileno_ASL) { - rv = x_sprintf(f.obe1, Wput, &f, fmt, ap); - mwrite(buf, f.lastlen); - } - else -#endif -#ifdef PF_BUF - if (F == stderr_ASL) { - rv = x_sprintf(f.obe1, pfput, &f, fmt, ap); - pf_put(buf, f.lastlen); - } - else -#endif - { - rv = x_sprintf(f.obe1, Fput, &f, fmt, ap); - fputs(buf, F); - } - } - else -#endif /*MESS*/ - { -#ifdef PF_BUF - if (F == stderr_ASL) { - rv = x_sprintf(f.obe1, pfput, &f, fmt, ap); - pf_put(buf, f.lastlen); - } - else -#endif - { - rv = x_sprintf(f.obe1, Fput, &f, fmt, ap); - fputs(buf, F); - } - } - va_end(ap); - return rv; - } - - void -Perror -#ifdef KR_headers - (s) char *s; -#else - (const char *s) -#endif -{ - if (s && *s) - Fprintf(Stderr, "%s: ", s); - Fprintf(Stderr, "%s\n", strerror(errno)); - } - - static char * -Snput -#ifdef KR_headers - (f, rvp) Finfo *f; int *rvp; -#else - (Finfo *f, int *rvp) -#endif -{ - char *s, *s0; - size_t L; - - *rvp += Bsize; - s0 = f->ob0; - s = f->u.sf; - if ((L = f->obe1 - s) > Bsize) { - L = Bsize; - goto copy; - } - if (L > 0) { - copy: - memcpy(s, s0, L); - f->u.sf = s + L; - } - return s0; - } - - int -Vsnprintf -#ifdef KR_headers - (s, n, fmt, ap) char *s; size_t n; char *fmt; va_list ap; -#else - (char *s, size_t n, const char *fmt, va_list ap) -#endif -{ - Finfo f; - char buf[Bsize]; - int rv; - size_t L; - - if (n <= 0 || !s) { - n = 1; - s = buf; - } - f.u.sf = s; - f.ob0 = buf; - f.obe1 = s + n - 1; - rv = x_sprintf(buf + Bsize, Snput, &f, fmt, ap); - if (f.lastlen > (L = f.obe1 - f.u.sf)) - f.lastlen = L; - if (f.lastlen > 0) { - memcpy(f.u.sf, buf, f.lastlen); - f.u.sf += f.lastlen; - } - *f.u.sf = 0; - return rv; - } - int -Snprintf -#ifdef KR_headers - (va_alist) - va_dcl -{ - char *s, *fmt; - int rv; - size_t n; - va_list ap; - - va_start(ap); - s = va_arg(ap, char*); - n = va_arg(ap, size_t); - fmt = va_arg(ap, char*); - /*}*/ -#else - (char *s, size_t n, const char *fmt, ...) -{ - int rv; - va_list ap; - - va_start(ap, fmt); -#endif - rv = Vsnprintf(s, n, fmt, ap); - va_end(ap); - return rv; - } - - -#ifdef __cplusplus -} -#endif diff --git a/third_party/gdtoa/smisc.c b/third_party/gdtoa/smisc.c index 34879508..49a6bed7 100644 --- a/third_party/gdtoa/smisc.c +++ b/third_party/gdtoa/smisc.c @@ -32,13 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - Bigint * -s2b -#ifdef KR_headers - (s, nd0, nd, y9, dplen MTa) CONST char *s; int dplen, nd0, nd; ULong y9; MTk -#else - (CONST char *s, int nd0, int nd, ULong y9, int dplen MTd) -#endif +Bigint * +s2b(CONST char *s, int nd0, int nd, ULong y9, int dplen MTd) { Bigint *b; int i, k; @@ -70,13 +65,8 @@ s2b return b; } - double -ratio -#ifdef KR_headers - (a, b) Bigint *a, *b; -#else - (Bigint *a, Bigint *b) -#endif +double +ratio(Bigint *a, Bigint *b) { U da, db; int k, ka, kb; @@ -109,13 +99,8 @@ ratio #ifdef INFNAN_CHECK - int -match -#ifdef KR_headers - (sp, t) char **sp, *t; -#else - (CONST char **sp, char *t) -#endif +int +match(CONST char **sp, char *t) { int c, d; CONST char *s = *sp; @@ -131,12 +116,8 @@ match } #endif /* INFNAN_CHECK */ - void -#ifdef KR_headers -copybits(c, n, b) ULong *c; int n; Bigint *b; -#else +void copybits(ULong *c, int n, Bigint *b) -#endif { ULong *ce, *x, *xe; #ifdef Pack_16 @@ -161,12 +142,8 @@ copybits(ULong *c, int n, Bigint *b) *c++ = 0; } - ULong -#ifdef KR_headers -any_on(b, k) Bigint *b; int k; -#else +ULong any_on(Bigint *b, int k) -#endif { int n, nwds; ULong *x, *x0, x1, x2; diff --git a/third_party/gdtoa/stdio1.h.txt b/third_party/gdtoa/stdio1.h.txt deleted file mode 100644 index b1103151..00000000 --- a/third_party/gdtoa/stdio1.h.txt +++ /dev/null @@ -1,106 +0,0 @@ -#include "libc/stdio/stdio.h" - -/**************************************************************** -Copyright (C) 1997-1999 Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -****************************************************************/ - -/* stdio1.h -- for using Printf, Fprintf, Sprintf while - * retaining the system-supplied printf, fprintf, sprintf. - */ - -#ifndef STDIO1_H_included -#define STDIO1_H_included -#ifndef STDIO_H_included /* allow suppressing stdio.h */ -#endif /* e.g., by cplex.h */ - -#ifdef KR_headers -#ifndef _SIZE_T -#define _SIZE_T -typedef unsigned int size_t; -#endif -#define ANSI(x) () -#ifndef Char -#define Char char -#endif -#else -#define ANSI(x) x -#ifndef Char -#define Char void -#endif -#endif - -#ifndef NO_STDIO1 -#ifdef _WIN32 -/* Avoid Microsoft bug that perrror may appear in stdlib.h. */ -/* It should only be declared in stdio.h. */ -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -extern int Fprintf ANSI((FILE *, const char *, ...)); -extern int Printf ANSI((const char *, ...)); -extern int Sprintf ANSI((char *, const char *, ...)); -extern int Snprintf ANSI((char *, size_t, const char *, ...)); -extern void Perror ANSI((const char *)); -extern int Vfprintf ANSI((FILE *, const char *, va_list)); -extern int Vsprintf ANSI((char *, const char *, va_list)); -extern int Vsnprintf ANSI((char *, size_t, const char *, va_list)); - -#ifdef PF_BUF -extern FILE *stderr_ASL; -extern void(*pfbuf_print_ASL) ANSI((char *)); -extern char *pfbuf_ASL; -extern void fflush_ASL ANSI((FILE *)); -#ifdef fflush -#define old_fflush_ASL fflush -#undef fflush -#endif -#define fflush fflush_ASL -#endif - -#ifdef __cplusplus -} -#endif - -#undef printf -#undef fprintf -#undef sprintf -#undef perror -#undef vfprintf -#undef vsprintf -#define printf Printf -#define fprintf Fprintf -#undef snprintf /* for MacOSX */ -#undef vsnprintf /* for MacOSX */ -#define snprintf Snprintf -#define sprintf Sprintf -#define perror Perror -#define vfprintf Vfprintf -#define vsnprintf Vsnprintf -#define vsprintf Vsprintf - -#endif /* NO_STDIO1 */ - -#endif /* STDIO1_H_included */ diff --git a/third_party/gdtoa/strtoIQ.c b/third_party/gdtoa/strtoIQ.c deleted file mode 100644 index af0c66cb..00000000 --- a/third_party/gdtoa/strtoIQ.c +++ /dev/null @@ -1,67 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - - int -#ifdef KR_headers -strtoIQ(s, sp, a, b) CONST char *s; char **sp; void *a; void *b; -#else -strtoIQ(CONST char *s, char **sp, void *a, void *b) -#endif -{ - static const FPI fpi = { 113, 1-16383-113+1, 32766-16383-113+1, 1, SI, 0 /*unused*/ }; - Long exp[2]; - Bigint *B[2]; - int k, rv[2]; - ULong *L = (ULong *)a, *M = (ULong *)b; -#ifdef MULTIPLE_THREADS - ThInfo *TI = 0; -#endif - - B[0] = Balloc(2 MTb); - B[0]->wds = 4; - k = strtoIg(s, sp, &fpi, exp, B, rv); - ULtoQ(L, B[0]->x, exp[0], rv[0]); - Bfree(B[0] MTb); - if (B[1]) { - ULtoQ(M, B[1]->x, exp[1], rv[1]); - Bfree(B[1] MTb); - } - else { - M[0] = L[0]; - M[1] = L[1]; - M[2] = L[2]; - M[3] = L[3]; - } - return k; - } diff --git a/third_party/gdtoa/strtoId.c b/third_party/gdtoa/strtoId.c index c1f5ec1c..d9b9ff69 100644 --- a/third_party/gdtoa/strtoId.c +++ b/third_party/gdtoa/strtoId.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtoId(s, sp, f0, f1) CONST char *s; char **sp; double *f0, *f1; -#else +int strtoId(CONST char *s, char **sp, double *f0, double *f1) -#endif { static const FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI, 0 /*unused*/ }; Long exp[2]; diff --git a/third_party/gdtoa/strtoIdd.c b/third_party/gdtoa/strtoIdd.c index 6b95f9bc..652a8071 100644 --- a/third_party/gdtoa/strtoIdd.c +++ b/third_party/gdtoa/strtoIdd.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtoIdd(s, sp, f0, f1) CONST char *s; char **sp; double *f0, *f1; -#else +int strtoIdd(CONST char *s, char **sp, double *f0, double *f1) -#endif { #ifdef Sudden_Underflow static const FPI fpi = { 106, 1-1023, 2046-1023-106+1, 1, 1, 0 /*unused*/ }; diff --git a/third_party/gdtoa/strtoIf.c b/third_party/gdtoa/strtoIf.c index 9c596c32..59b0b094 100644 --- a/third_party/gdtoa/strtoIf.c +++ b/third_party/gdtoa/strtoIf.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtoIf(s, sp, f0, f1) CONST char *s; char **sp; float *f0, *f1; -#else +int strtoIf(CONST char *s, char **sp, float *f0, float *f1) -#endif { static const FPI fpi = { 24, 1-127-24+1, 254-127-24+1, 1, SI, 0 /*unused*/ }; Long exp[2]; diff --git a/third_party/gdtoa/strtoIg.c b/third_party/gdtoa/strtoIg.c index 4512db73..7acf713c 100644 --- a/third_party/gdtoa/strtoIg.c +++ b/third_party/gdtoa/strtoIg.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtoIg(s00, se, fpi, exp, B, rvp) CONST char *s00; char **se; CONST FPI *fpi; Long *exp; Bigint **B; int *rvp; -#else +int strtoIg(CONST char *s00, char **se, CONST FPI *fpi, Long *exp, Bigint **B, int *rvp) -#endif { Bigint *b, *b1; int i, nb, nw, nw1, rv, rv1, swap; diff --git a/third_party/gdtoa/strtoIx.c b/third_party/gdtoa/strtoIx.c index c0cf63b4..de588353 100644 --- a/third_party/gdtoa/strtoIx.c +++ b/third_party/gdtoa/strtoIx.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtoIx(s, sp, a, b) CONST char *s; char **sp; void *a; void *b; -#else +int strtoIx(CONST char *s, char **sp, void *a, void *b) -#endif { static const FPI fpi = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI, 0 /*unused*/ }; Long exp[2]; diff --git a/third_party/gdtoa/strtoIxL.c b/third_party/gdtoa/strtoIxL.c deleted file mode 100644 index 6a574a5d..00000000 --- a/third_party/gdtoa/strtoIxL.c +++ /dev/null @@ -1,66 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - - int -#ifdef KR_headers -strtoIxL(s, sp, a, b) CONST char *s; char **sp; void *a; void *b; -#else -strtoIxL(CONST char *s, char **sp, void *a, void *b) -#endif -{ - static const FPI fpi = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI, 0 /*unused*/ }; - Long exp[2]; - Bigint *B[2]; - int k, rv[2]; - ULong *L = (ULong *)a, *M = (ULong *)b; -#ifdef MULTIPLE_THREADS - ThInfo *TI = 0; -#endif - - B[0] = Balloc(1 MTb); - B[0]->wds = 2; - k = strtoIg(s, sp, &fpi, exp, B, rv); - ULtoxL(L, B[0]->x, exp[0], rv[0]); - Bfree(B[0] MTb); - if (B[1]) { - ULtoxL(M, B[1]->x, exp[1], rv[1]); - Bfree(B[1] MTb); - } - else { - M[0] = L[0]; - M[1] = L[1]; - M[2] = L[2]; - } - return k; - } diff --git a/third_party/gdtoa/strtod.c b/third_party/gdtoa/strtod.c index 35fae5ae..1080b6b3 100644 --- a/third_party/gdtoa/strtod.c +++ b/third_party/gdtoa/strtod.c @@ -50,13 +50,8 @@ static CONST double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, #endif #ifdef Avoid_Underflow /*{*/ - static double -sulp -#ifdef KR_headers - (x, scale) U *x; int scale; -#else - (U *x, int scale) -#endif +static double +sulp(U *x, int scale) { U u; double rv; @@ -71,13 +66,8 @@ sulp } #endif /*}*/ - double -strtod -#ifdef KR_headers - (s00, se) CONST char *s00; char **se; -#else - (CONST char *s00, char **se) -#endif +double +strtod(CONST char *s00, char **se) { #ifdef Avoid_Underflow int scale; diff --git a/third_party/gdtoa/strtodI.c b/third_party/gdtoa/strtodI.c index 6c2c575a..dc4d718f 100644 --- a/third_party/gdtoa/strtodI.c +++ b/third_party/gdtoa/strtodI.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - static double -#ifdef KR_headers -ulpdown(d) U *d; -#else +static double ulpdown(U *d) -#endif { double u; ULong *L = d->L; @@ -49,12 +45,8 @@ ulpdown(U *d) return u; } - int -#ifdef KR_headers -strtodI(s, sp, dd) CONST char *s; char **sp; double *dd; -#else +int strtodI(CONST char *s, char **sp, double *dd) -#endif { static const FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI, 0 /*unused*/ }; ULong bits[2], sign; diff --git a/third_party/gdtoa/strtodg.c b/third_party/gdtoa/strtodg.c index bf34aca8..8a22801f 100644 --- a/third_party/gdtoa/strtodg.c +++ b/third_party/gdtoa/strtodg.c @@ -42,12 +42,8 @@ fivesbits[] = { 0, 3, 5, 7, 10, 12, 14, 17, 19, 21, #endif }; - Bigint * -#ifdef KR_headers -increment(b MTa) Bigint *b; MTk -#else +Bigint * increment(Bigint *b MTd) -#endif { ULong *x, *xe; Bigint *b1; @@ -87,12 +83,8 @@ increment(Bigint *b MTd) return b; } - void -#ifdef KR_headers -decrement(b) Bigint *b; -#else +void decrement(Bigint *b) -#endif { ULong *x, *xe; #ifdef Pack_16 @@ -119,12 +111,8 @@ decrement(Bigint *b) #endif } - static int -#ifdef KR_headers -all_on(b, n) Bigint *b; int n; -#else +static int all_on(Bigint *b, int n) -#endif { ULong *x, *xe; @@ -138,12 +126,8 @@ all_on(Bigint *b, int n) return 1; } - Bigint * -#ifdef KR_headers -set_ones(b, n MTa) Bigint *b; int n; MTk -#else +Bigint * set_ones(Bigint *b, int n MTd) -#endif { int k; ULong *x, *xe; @@ -166,14 +150,8 @@ set_ones(Bigint *b, int n MTd) return b; } - static int -rvOK -#ifdef KR_headers - (d, fpi, exp, bits, exact, rd, irv MTa) - U *d; CONST FPI *fpi; Long *exp; ULong *bits; int exact, rd, *irv; MTk -#else - (U *d, CONST FPI *fpi, Long *exp, ULong *bits, int exact, int rd, int *irv MTd) -#endif +static int +rvOK(U *d, CONST FPI *fpi, Long *exp, ULong *bits, int exact, int rd, int *irv MTd) { Bigint *b; ULong carry, inex, lostbits; @@ -287,12 +265,8 @@ rvOK return rv; } - static int -#ifdef KR_headers -mantbits(d) U *d; -#else +static int mantbits(U *d) -#endif { ULong L; #ifdef VAX @@ -310,14 +284,8 @@ mantbits(U *d) return P - 32 - lo0bits(&L); } - int -strtodg -#ifdef KR_headers - (s00, se, fpi, exp, bits) - CONST char *s00; char **se; CONST FPI *fpi; Long *exp; ULong *bits; -#else - (CONST char *s00, char **se, CONST FPI *fpi, Long *exp, ULong *bits) -#endif +int +strtodg(CONST char *s00, char **se, CONST FPI *fpi, Long *exp, ULong *bits) { int abe, abits, asub; int bb0, bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, decpt, denorm; diff --git a/third_party/gdtoa/strtodnrp.c b/third_party/gdtoa/strtodnrp.c index 5211adf8..2b020e16 100644 --- a/third_party/gdtoa/strtodnrp.c +++ b/third_party/gdtoa/strtodnrp.c @@ -38,12 +38,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - double -#ifdef KR_headers -strtod(s, sp) CONST char *s; char **sp; -#else +double strtod(CONST char *s, char **sp) -#endif { static const FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI, 0 /*unused*/ }; ULong bits[2]; diff --git a/third_party/gdtoa/strtof.c b/third_party/gdtoa/strtof.c index c7dd4950..4c95df4d 100644 --- a/third_party/gdtoa/strtof.c +++ b/third_party/gdtoa/strtof.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - float -#ifdef KR_headers -strtof(s, sp) CONST char *s; char **sp; -#else +float strtof(CONST char *s, char **sp) -#endif { static FPI fpi0 = { 24, 1-127-24+1, 254-127-24+1, 1, SI, 0 /*unused*/ }; ULong bits[1]; diff --git a/third_party/gdtoa/strtopQ.c b/third_party/gdtoa/strtopQ.c deleted file mode 100644 index 6885409e..00000000 --- a/third_party/gdtoa/strtopQ.c +++ /dev/null @@ -1,110 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998, 2000 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#define _3 3 -#endif -#ifdef IEEE_8087 -#define _0 3 -#define _1 2 -#define _2 1 -#define _3 0 -#endif - - extern ULong NanDflt_Q_D2A[4]; - - - int -#ifdef KR_headers -strtopQ(s, sp, V) CONST char *s; char **sp; void *V; -#else -strtopQ(CONST char *s, char **sp, void *V) -#endif -{ - static const FPI fpi0 = { 113, 1-16383-113+1, 32766 - 16383 - 113 + 1, 1, SI, 0 /*unused*/ }; - ULong bits[4]; - Long exp; - int k; - ULong *L = (ULong*)V; -#ifdef Honor_FLT_ROUNDS -#include "third_party/gdtoa/gdtoa_fltrnds.inc" -#else -#define fpi &fpi0 -#endif - - k = strtodg(s, sp, fpi, &exp, bits); - switch(k & STRTOG_Retmask) { - case STRTOG_NoNumber: - case STRTOG_Zero: - L[0] = L[1] = L[2] = L[3] = 0; - break; - - case STRTOG_Normal: - case STRTOG_NaNbits: - L[_3] = bits[0]; - L[_2] = bits[1]; - L[_1] = bits[2]; - L[_0] = (bits[3] & ~0x10000) | ((exp + 0x3fff + 112) << 16); - break; - - case STRTOG_Denormal: - L[_3] = bits[0]; - L[_2] = bits[1]; - L[_1] = bits[2]; - L[_0] = bits[3]; - break; - - case STRTOG_Infinite: - L[_0] = 0x7fff0000; - L[_1] = L[_2] = L[_3] = 0; - break; - - case STRTOG_NaN: - L[_0] = NanDflt_Q_D2A[3]; - L[_1] = NanDflt_Q_D2A[2]; - L[_2] = NanDflt_Q_D2A[1]; - L[_3] = NanDflt_Q_D2A[0]; - } - if (k & STRTOG_Neg) - L[_0] |= 0x80000000L; - return k; - } diff --git a/third_party/gdtoa/strtopd.c b/third_party/gdtoa/strtopd.c index f506e4fc..6a4741fa 100644 --- a/third_party/gdtoa/strtopd.c +++ b/third_party/gdtoa/strtopd.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtopd(s, sp, d) char *s; char **sp; double *d; -#else +int strtopd(CONST char *s, char **sp, double *d) -#endif { static const FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI, 0 /*unused*/ }; ULong bits[2]; diff --git a/third_party/gdtoa/strtopdd.c b/third_party/gdtoa/strtopdd.c index 302460a0..6a24a9e0 100644 --- a/third_party/gdtoa/strtopdd.c +++ b/third_party/gdtoa/strtopdd.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtopdd(s, sp, dd) CONST char *s; char **sp; double *dd; -#else +int strtopdd(CONST char *s, char **sp, double *dd) -#endif { #ifdef Sudden_Underflow static const FPI fpi0 = { 106, 1-1023, 2046-1023-106+1, 1, 1, 0 /*unused*/ }; diff --git a/third_party/gdtoa/strtopf.c b/third_party/gdtoa/strtopf.c index d00d73bc..8cf73acf 100644 --- a/third_party/gdtoa/strtopf.c +++ b/third_party/gdtoa/strtopf.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - int -#ifdef KR_headers -strtopf(s, sp, f) CONST char *s; char **sp; float *f; -#else +int strtopf(CONST char *s, char **sp, float *f) -#endif { static const FPI fpi0 = { 24, 1-127-24+1, 254-127-24+1, 1, SI, 0 /*unused*/ }; ULong bits[1], *L; diff --git a/third_party/gdtoa/strtopx.c b/third_party/gdtoa/strtopx.c index c1514c82..f1c59474 100644 --- a/third_party/gdtoa/strtopx.c +++ b/third_party/gdtoa/strtopx.c @@ -32,7 +32,7 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - extern UShort NanDflt_ldus_D2A[5]; + extern UShort __gdtoa_NanDflt_ldus[5]; #undef _0 #undef _1 @@ -54,12 +54,8 @@ THIS SOFTWARE. #define _4 0 #endif - int -#ifdef KR_headers -strtopx(s, sp, V) CONST char *s; char **sp; void *V; -#else +int strtopx(CONST char *s, char **sp, void *V) -#endif { const static FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI, 0 /*unused*/ }; ULong bits[2]; @@ -100,11 +96,11 @@ strtopx(CONST char *s, char **sp, void *V) break; case STRTOG_NaN: - L[_4] = NanDflt_ldus_D2A[0]; - L[_3] = NanDflt_ldus_D2A[1]; - L[_2] = NanDflt_ldus_D2A[2]; - L[_1] = NanDflt_ldus_D2A[3]; - L[_0] = NanDflt_ldus_D2A[4]; + L[_4] = __gdtoa_NanDflt_ldus[0]; + L[_3] = __gdtoa_NanDflt_ldus[1]; + L[_2] = __gdtoa_NanDflt_ldus[2]; + L[_1] = __gdtoa_NanDflt_ldus[3]; + L[_0] = __gdtoa_NanDflt_ldus[4]; } if (k & STRTOG_Neg) L[_0] |= 0x8000; diff --git a/third_party/gdtoa/strtopxL.c b/third_party/gdtoa/strtopxL.c deleted file mode 100644 index da55bb2e..00000000 --- a/third_party/gdtoa/strtopxL.c +++ /dev/null @@ -1,100 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998, 2000 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - - extern ULong NanDflt_xL_D2A[3]; - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#endif -#ifdef IEEE_8087 -#define _0 2 -#define _1 1 -#define _2 0 -#endif - - int -#ifdef KR_headers -strtopxL(s, sp, V) CONST char *s; char **sp; void *V; -#else -strtopxL(CONST char *s, char **sp, void *V) -#endif -{ - static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI, 0 /*unused*/ }; - ULong bits[2]; - Long exp; - int k; - ULong *L = (ULong*)V; -#ifdef Honor_FLT_ROUNDS -#include "third_party/gdtoa/gdtoa_fltrnds.inc" -#else -#define fpi &fpi0 -#endif - - k = strtodg(s, sp, fpi, &exp, bits); - switch(k & STRTOG_Retmask) { - case STRTOG_NoNumber: - case STRTOG_Zero: - L[0] = L[1] = L[2] = 0; - break; - - case STRTOG_Normal: - case STRTOG_Denormal: - case STRTOG_NaNbits: - L[_2] = bits[0]; - L[_1] = bits[1]; - L[_0] = (exp + 0x3fff + 63) << 16; - break; - - case STRTOG_Infinite: - L[_0] = 0x7fff << 16; - L[_1] = 0x80000000; - L[_2] = 0; - break; - - case STRTOG_NaN: - L[_0] = NanDflt_xL_D2A[2]; - L[_1] = NanDflt_xL_D2A[1]; - L[_2] = NanDflt_xL_D2A[0]; - } - if (k & STRTOG_Neg) - L[_0] |= 0x80000000L; - return k; - } diff --git a/third_party/gdtoa/strtorQ.c b/third_party/gdtoa/strtorQ.c deleted file mode 100644 index 8e6cb02a..00000000 --- a/third_party/gdtoa/strtorQ.c +++ /dev/null @@ -1,120 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998, 2000 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#define _3 3 -#endif -#ifdef IEEE_8087 -#define _0 3 -#define _1 2 -#define _2 1 -#define _3 0 -#endif - - extern ULong NanDflt_Q_D2A[4]; - - void -#ifdef KR_headers -ULtoQ(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k; -#else -ULtoQ(ULong *L, ULong *bits, Long exp, int k) -#endif -{ - switch(k & STRTOG_Retmask) { - case STRTOG_NoNumber: - case STRTOG_Zero: - L[0] = L[1] = L[2] = L[3] = 0; - break; - - case STRTOG_Normal: - case STRTOG_NaNbits: - L[_3] = bits[0]; - L[_2] = bits[1]; - L[_1] = bits[2]; - L[_0] = (bits[3] & ~0x10000) | ((exp + 0x3fff + 112) << 16); - break; - - case STRTOG_Denormal: - L[_3] = bits[0]; - L[_2] = bits[1]; - L[_1] = bits[2]; - L[_0] = bits[3]; - break; - - case STRTOG_Infinite: - L[_0] = 0x7fff0000; - L[_1] = L[_2] = L[_3] = 0; - break; - - case STRTOG_NaN: - L[_0] = NanDflt_Q_D2A[3]; - L[_1] = NanDflt_Q_D2A[2]; - L[_2] = NanDflt_Q_D2A[1]; - L[_3] = NanDflt_Q_D2A[0]; - } - if (k & STRTOG_Neg) - L[_0] |= 0x80000000L; - } - - int -#ifdef KR_headers -strtorQ(s, sp, rounding, L) CONST char *s; char **sp; int rounding; void *L; -#else -strtorQ(CONST char *s, char **sp, int rounding, void *L) -#endif -{ - static const FPI fpi0 = { 113, 1-16383-113+1, 32766-16383-113+1, 1, SI, 0 /*unused*/ }; - FPI *fpi, fpi1; - ULong bits[4]; - Long exp; - int k; - - fpi = &fpi0; - if (rounding != FPI_Round_near) { - fpi1 = fpi0; - fpi1.rounding = rounding; - fpi = &fpi1; - } - k = strtodg(s, sp, fpi, &exp, bits); - ULtoQ((ULong*)L, bits, exp, k); - return k; - } diff --git a/third_party/gdtoa/strtord.c b/third_party/gdtoa/strtord.c index 61ae0df8..361b44a7 100644 --- a/third_party/gdtoa/strtord.c +++ b/third_party/gdtoa/strtord.c @@ -32,14 +32,10 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - extern ULong NanDflt_d_D2A[2]; + extern ULong __gdtoa_NanDflt_d[2]; - void -#ifdef KR_headers -ULtod(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k; -#else +void ULtod(ULong *L, ULong *bits, Long exp, int k) -#endif { switch(k & STRTOG_Retmask) { case STRTOG_NoNumber: @@ -64,19 +60,15 @@ ULtod(ULong *L, ULong *bits, Long exp, int k) break; case STRTOG_NaN: - L[_0] = NanDflt_d_D2A[1]; - L[_1] = NanDflt_d_D2A[0]; + L[_0] = __gdtoa_NanDflt_d[1]; + L[_1] = __gdtoa_NanDflt_d[0]; } if (k & STRTOG_Neg) L[_0] |= 0x80000000L; } - int -#ifdef KR_headers -strtord(s, sp, rounding, d) CONST char *s; char **sp; int rounding; double *d; -#else +int strtord(CONST char *s, char **sp, int rounding, double *d) -#endif { static const FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI, 0 /*unused*/ }; FPI *fpi, fpi1; diff --git a/third_party/gdtoa/strtordd.c b/third_party/gdtoa/strtordd.c index 490bb1b6..25a27774 100644 --- a/third_party/gdtoa/strtordd.c +++ b/third_party/gdtoa/strtordd.c @@ -32,14 +32,10 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - extern ULong NanDflt_d_D2A[2]; + extern ULong __gdtoa_NanDflt_d[2]; - void -#ifdef KR_headers -ULtodd(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k; -#else +void ULtodd(ULong *L, ULong *bits, Long exp, int k) -#endif { int i, j; @@ -156,8 +152,8 @@ ULtodd(ULong *L, ULong *bits, Long exp, int k) break; case STRTOG_NaN: - L[_0] = L[_0+2] = NanDflt_d_D2A[1]; - L[_1] = L[_1+2] = NanDflt_d_D2A[0]; + L[_0] = L[_0+2] = __gdtoa_NanDflt_d[1]; + L[_1] = L[_1+2] = __gdtoa_NanDflt_d[0]; break; case STRTOG_NaNbits: @@ -174,12 +170,8 @@ ULtodd(ULong *L, ULong *bits, Long exp, int k) } } - int -#ifdef KR_headers -strtordd(s, sp, rounding, dd) CONST char *s; char **sp; int rounding; double *dd; -#else +int strtordd(CONST char *s, char **sp, int rounding, double *dd) -#endif { #ifdef Sudden_Underflow static const FPI fpi0 = { 106, 1-1023, 2046-1023-106+1, 1, 1, 0 /*unused*/ }; diff --git a/third_party/gdtoa/strtorf.c b/third_party/gdtoa/strtorf.c index ddf21102..e971e4c9 100644 --- a/third_party/gdtoa/strtorf.c +++ b/third_party/gdtoa/strtorf.c @@ -32,14 +32,10 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - extern ULong NanDflt_f_D2A[1]; + extern ULong __gdtoa_NanDflt_f[1]; - void -#ifdef KR_headers -ULtof(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k; -#else +void ULtof(ULong *L, ULong *bits, Long exp, int k) -#endif { switch(k & STRTOG_Retmask) { case STRTOG_NoNumber: @@ -61,18 +57,14 @@ ULtof(ULong *L, ULong *bits, Long exp, int k) break; case STRTOG_NaN: - L[0] = NanDflt_f_D2A[0]; + L[0] = __gdtoa_NanDflt_f[0]; } if (k & STRTOG_Neg) L[0] |= 0x80000000L; } - int -#ifdef KR_headers -strtorf(s, sp, rounding, f) CONST char *s; char **sp; int rounding; float *f; -#else +int strtorf(CONST char *s, char **sp, int rounding, float *f) -#endif { static const FPI fpi0 = { 24, 1-127-24+1, 254-127-24+1, 1, SI, 0 /*unused*/ }; FPI *fpi, fpi1; diff --git a/third_party/gdtoa/strtorx.c b/third_party/gdtoa/strtorx.c index 1fe09544..6a5e3835 100644 --- a/third_party/gdtoa/strtorx.c +++ b/third_party/gdtoa/strtorx.c @@ -52,14 +52,10 @@ THIS SOFTWARE. #define _4 0 #endif - extern UShort NanDflt_ldus_D2A[5]; +extern UShort __gdtoa_NanDflt_ldus[5]; - void -#ifdef KR_headers -ULtox(L, bits, exp, k) UShort *L; ULong *bits; Long exp; int k; -#else +void ULtox(UShort *L, ULong *bits, Long exp, int k) -#endif { switch(k & STRTOG_Retmask) { case STRTOG_NoNumber: @@ -88,22 +84,18 @@ ULtox(UShort *L, ULong *bits, Long exp, int k) break; case STRTOG_NaN: - L[_4] = NanDflt_ldus_D2A[0]; - L[_3] = NanDflt_ldus_D2A[1]; - L[_2] = NanDflt_ldus_D2A[2]; - L[_1] = NanDflt_ldus_D2A[3]; - L[_0] = NanDflt_ldus_D2A[4]; + L[_4] = __gdtoa_NanDflt_ldus[0]; + L[_3] = __gdtoa_NanDflt_ldus[1]; + L[_2] = __gdtoa_NanDflt_ldus[2]; + L[_1] = __gdtoa_NanDflt_ldus[3]; + L[_0] = __gdtoa_NanDflt_ldus[4]; } if (k & STRTOG_Neg) L[_0] |= 0x8000; } - int -#ifdef KR_headers -strtorx(s, sp, rounding, L) CONST char *s; char **sp; int rounding; void *L; -#else +int strtorx(CONST char *s, char **sp, int rounding, void *L) -#endif { static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI, 0 /*unused*/ }; FPI *fpi, fpi1; diff --git a/third_party/gdtoa/strtorxL.c b/third_party/gdtoa/strtorxL.c deleted file mode 100644 index 980dff9b..00000000 --- a/third_party/gdtoa/strtorxL.c +++ /dev/null @@ -1,111 +0,0 @@ -#include "third_party/gdtoa/gdtoa.internal.h" - -/* clang-format off */ -/**************************************************************** - -The author of this software is David M. Gay. - -Copyright (C) 1998, 2000 by Lucent Technologies -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of Lucent or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - -****************************************************************/ - -/* Please send bug reports to David M. Gay (dmg at acm dot org, - * with " at " changed at "@" and " dot " changed to "."). */ - -#undef _0 -#undef _1 - -/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */ - -#ifdef IEEE_MC68k -#define _0 0 -#define _1 1 -#define _2 2 -#endif -#ifdef IEEE_8087 -#define _0 2 -#define _1 1 -#define _2 0 -#endif - - extern ULong NanDflt_xL_D2A[3]; - - void -#ifdef KR_headers -ULtoxL(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k; -#else -ULtoxL(ULong *L, ULong *bits, Long exp, int k) -#endif -{ - switch(k & STRTOG_Retmask) { - case STRTOG_NoNumber: - case STRTOG_Zero: - L[0] = L[1] = L[2] = 0; - break; - - case STRTOG_Normal: - case STRTOG_Denormal: - case STRTOG_NaNbits: - L[_0] = (exp + 0x3fff + 63) << 16; - L[_1] = bits[1]; - L[_2] = bits[0]; - break; - - case STRTOG_Infinite: - L[_0] = 0x7fff0000; - L[_1] = 0x80000000; - L[_2] = 0; - break; - - case STRTOG_NaN: - L[_0] = NanDflt_xL_D2A[2]; - L[_1] = NanDflt_xL_D2A[1]; - L[_2] = NanDflt_xL_D2A[0]; - } - if (k & STRTOG_Neg) - L[_0] |= 0x80000000L; - } - - int -#ifdef KR_headers -strtorxL(s, sp, rounding, L) CONST char *s; char **sp; int rounding; void *L; -#else -strtorxL(CONST char *s, char **sp, int rounding, void *L) -#endif -{ - static const FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI, 0 /*unused*/ }; - FPI *fpi, fpi1; - ULong bits[2]; - Long exp; - int k; - - fpi = &fpi0; - if (rounding != FPI_Round_near) { - fpi1 = fpi0; - fpi1.rounding = rounding; - fpi = &fpi1; - } - k = strtodg(s, sp, fpi, &exp, bits); - ULtoxL((ULong*)L, bits, exp, k); - return k; - } diff --git a/third_party/gdtoa/sum.c b/third_party/gdtoa/sum.c index 59cc6c58..73519fbf 100644 --- a/third_party/gdtoa/sum.c +++ b/third_party/gdtoa/sum.c @@ -32,12 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - Bigint * -#ifdef KR_headers -sum(a, b MTa) Bigint *a; Bigint *b; MTk -#else +Bigint * sum(Bigint *a, Bigint *b MTd) -#endif { Bigint *c; ULong carry, *xc, *xa, *xb, *xe, y; diff --git a/third_party/gdtoa/ulp.c b/third_party/gdtoa/ulp.c index bf51cb4c..c10f8dc9 100644 --- a/third_party/gdtoa/ulp.c +++ b/third_party/gdtoa/ulp.c @@ -32,13 +32,8 @@ THIS SOFTWARE. /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ - double -ulp -#ifdef KR_headers - (x) U *x; -#else - (U *x) -#endif +double +ulp(U *x) { Long L; U a; diff --git a/third_party/getopt/getopt.c b/third_party/getopt/getopt.c index d5ec6210..6136b48a 100644 --- a/third_party/getopt/getopt.c +++ b/third_party/getopt/getopt.c @@ -3,7 +3,6 @@ getopt (BSD-3)\\n\ Copyright 1987, 1993, 1994 The Regents of the University of California\""); asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ /* $NetBSD: getopt.c,v 1.26 2003/08/07 16:43:40 agc Exp $ */ /* @@ -36,105 +35,122 @@ asm(".include \"libc/disclaimer.inc\""); * * @(#)getopt.c 8.3 (Berkeley) 4/27/95 * $FreeBSD: src/lib/libc/stdlib/getopt.c,v 1.8 2007/01/09 00:28:10 imp Exp $ - * $DragonFly: src/lib/libc/stdlib/getopt.c,v 1.7 2005/11/20 12:37:48 swildner Exp $ + * $DragonFly: src/lib/libc/stdlib/getopt.c,v 1.7 2005/11/20 12:37:48 swildner + *Exp $ */ -#include "libc/str/str.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" +#include "libc/str/str.h" STATIC_YOINK("_init_getopt"); -#define BADCH (int)'?' -#define BADARG (int)':' +#define BADCH (int)'?' +#define BADARG (int)':' -int opterr, /* if error message should be printed */ - optind, /* index into parent argv vector */ - optopt, /* character checked for validity */ - optreset; /* reset getopt */ -char *optarg; /* argument associated with option */ -char *getopt_place; /* option letter processing */ -char kGetoptEmsg[1]; - -/* - * getopt -- - * Parse argc/argv argument vector. +/** + * If error message should be printed. */ -int -getopt(int nargc, char * const nargv[], const char *ostr) -{ - char *oli; /* option letter list index */ +int opterr; - /* - * Some programs like cvs expect optind = 0 to trigger - * a reset of getopt. - */ - if (optind == 0) - optind = 1; +/** + * Index into parent argv vector. + */ +int optind; - if (optreset || *getopt_place == 0) { /* update scanning pointer */ - optreset = 0; - getopt_place = nargv[optind]; - if (optind >= nargc || *getopt_place++ != '-') { - /* Argument is absent or is not an option */ - getopt_place = kGetoptEmsg; - return (-1); - } - optopt = *getopt_place++; - if (optopt == '-' && *getopt_place == 0) { - /* "--" => end of options */ - ++optind; - getopt_place = kGetoptEmsg; - return (-1); - } - if (optopt == 0) { - /* Solitary '-', treat as a '-' option - if the program (eg su) is looking for it. */ - getopt_place = kGetoptEmsg; - if (strchr(ostr, '-') == NULL) - return (-1); - optopt = '-'; - } - } else - optopt = *getopt_place++; +/** + * Character checked for validity. + */ +int optopt; - /* See if option letter is one the caller wanted... */ - if (optopt == ':' || (oli = strchr(ostr, optopt)) == NULL) { - if (*getopt_place == 0) - ++optind; - if (opterr && *ostr != ':') - fprintf(stderr, - "%s: illegal option -- %c\n", program_invocation_name, - optopt); - return (BADCH); - } +/** + * Reset getopt. + */ +int optreset; - /* Does this option need an argument? */ - if (oli[1] != ':') { - /* don't need argument */ - optarg = NULL; - if (*getopt_place == 0) - ++optind; - } else { - /* Option-argument is either the rest of this argument or the - entire next argument. */ - if (*getopt_place) - optarg = getopt_place; - else if (nargc > ++optind) - optarg = nargv[optind]; - else { - /* option-argument absent */ - getopt_place = kGetoptEmsg; - if (*ostr == ':') - return (BADARG); - if (opterr) - fprintf(stderr, - "%s: option requires an argument -- %c\n", - program_invocation_name, optopt); - return (BADCH); - } - getopt_place = kGetoptEmsg; - ++optind; - } - return (optopt); /* return option letter */ +/** + * Argument associated with option. + */ +char *optarg; + +/** + * Option letter processing. + */ +char *getopt_place; + +char kGetoptEmsg[1] hidden; + +/** + * Parses argc/argv argument vector. + */ +int getopt(int nargc, char *const nargv[], const char *ostr) { + char *oli; /* option letter list index */ + + /* + * Some programs like cvs expect optind = 0 to trigger + * a reset of getopt. + */ + if (optind == 0) optind = 1; + + if (optreset || *getopt_place == 0) { /* update scanning pointer */ + optreset = 0; + getopt_place = nargv[optind]; + if (optind >= nargc || *getopt_place++ != '-') { + /* Argument is absent or is not an option */ + getopt_place = kGetoptEmsg; + return -1; + } + optopt = *getopt_place++; + if (optopt == '-' && *getopt_place == 0) { + /* "--" => end of options */ + ++optind; + getopt_place = kGetoptEmsg; + return -1; + } + if (optopt == 0) { + /* Solitary '-', treat as a '-' option + if the program (eg su) is looking for it. */ + getopt_place = kGetoptEmsg; + if (strchr(ostr, '-') == NULL) return -1; + optopt = '-'; + } + } else { + optopt = *getopt_place++; + } + + /* See if option letter is one the caller wanted... */ + if (optopt == ':' || (oli = strchr(ostr, optopt)) == NULL) { + if (*getopt_place == 0) ++optind; + if (opterr && *ostr != ':') { + fprintf(stderr, "%s: illegal option -- %c\n", program_invocation_name, + optopt); + } + return (BADCH); + } + + /* Does this option need an argument? */ + if (oli[1] != ':') { + /* don't need argument */ + optarg = NULL; + if (*getopt_place == 0) ++optind; + } else { + /* Option-argument is either the rest of this argument or the + entire next argument. */ + if (*getopt_place) { + optarg = getopt_place; + } else if (nargc > ++optind) { + optarg = nargv[optind]; + } else { + /* option-argument absent */ + getopt_place = kGetoptEmsg; + if (*ostr == ':') return (BADARG); + if (opterr) + fprintf(stderr, "%s: option requires an argument -- %c\n", + program_invocation_name, optopt); + return (BADCH); + } + getopt_place = kGetoptEmsg; + ++optind; + } + return (optopt); /* return option letter */ } diff --git a/third_party/regex/regerror.c b/third_party/regex/regerror.c index c0c40630..385ed947 100644 --- a/third_party/regex/regerror.c +++ b/third_party/regex/regerror.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/fmt.h" #include "libc/str/str.h" #include "third_party/regex/regex.h" diff --git a/third_party/regex/regexec.c b/third_party/regex/regexec.c index f22b8fd4..3c33eb33 100644 --- a/third_party/regex/regexec.c +++ b/third_party/regex/regexec.c @@ -875,7 +875,7 @@ static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, * @return 0 or REG_NOMATCH */ int regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[nmatch], int eflags) { + regmatch_t *pmatch, int eflags) { tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; reg_errcode_t status; regoff_t *tags = NULL, eo; diff --git a/third_party/third_party.mk b/third_party/third_party.mk index 72dd9616..e6f5f2a9 100644 --- a/third_party/third_party.mk +++ b/third_party/third_party.mk @@ -3,7 +3,6 @@ .PHONY: o/$(MODE)/third_party o/$(MODE)/third_party: \ - o/$(MODE)/third_party/avir \ o/$(MODE)/third_party/blas \ o/$(MODE)/third_party/chibicc \ o/$(MODE)/third_party/compiler_rt \ diff --git a/tool/build/blinkenlights.c b/tool/build/blinkenlights.c index fb853141..649e01ca 100644 --- a/tool/build/blinkenlights.c +++ b/tool/build/blinkenlights.c @@ -22,7 +22,7 @@ #include "libc/alg/arraylist2.internal.h" #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/ioctl.h" #include "libc/calls/struct/iovec.h" diff --git a/tool/build/helpop.c b/tool/build/helpop.c index bcbe2c51..d60f4a78 100644 --- a/tool/build/helpop.c +++ b/tool/build/helpop.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/conv.h" #include "libc/macros.h" #include "libc/runtime/runtime.h" diff --git a/tool/build/lib/asmdown.c b/tool/build/lib/asmdown.c new file mode 100644 index 00000000..b15f6d44 --- /dev/null +++ b/tool/build/lib/asmdown.c @@ -0,0 +1,169 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/alg/alg.h" +#include "libc/mem/mem.h" +#include "libc/str/str.h" +#include "tool/build/lib/asmdown.h" +#include "tool/build/lib/javadown.h" + +static bool IsSymbolChar1(char c) { + return (c & 0x80) || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || + c == '$' || c == '_'; +} + +static bool IsSymbolChar2(char c) { + return (c & 0x80) || ('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z') || c == '$' || c == '_'; +} + +static bool IsSymbolString(const char *s) { + int i; + if (!IsSymbolChar1(*s++)) return false; + while (*s) { + if (!IsSymbolChar2(*s++)) return false; + } + return true; +} + +/** + * Extracts symbols and docstrings from .S file. + * + * Docstrings are multiline Fortran-style AT&T assembly comments + * preceding a symbol that have at least one @tag line. + * + * @param code is contents of .S file + * @param size is byte length of code + * @return object that needs to be FreeAsmdown()'d + * @note this function assumes canonical unix newlines + */ +struct Asmdown *ParseAsmdown(const char *code, size_t size) { + struct Asmdown *ad; + char *p1, *p2, *p3, *symbol, *alias; + enum { BOL, COM, SYM, OTHER } state; + int i, j, line, start_line, start_docstring, end_docstring, start_symbol; + ad = calloc(1, sizeof(struct Asmdown)); + line = 1; + start_line = 1; + state = BOL; + start_docstring = 0; + end_docstring = 0; + start_symbol = 0; + for (i = 0; i < size; ++i) { + switch (state) { + case BOL: + if (code[i] == '/') { + start_line = line; + start_docstring = i; + state = COM; + } else if (IsSymbolChar1(code[i])) { + start_symbol = i; + state = SYM; + } else if (code[i] == '\n') { + ++line; + } else if (i + 8 < size && !memcmp(code + i, "\t.alias\t", 8)) { + p1 = code + i + 8; + if ((p2 = strchr(p1, ',')) && (p3 = strchr(p2, '\n'))) { + symbol = strndup(p1, p2 - p1); + alias = strndup(p2 + 1, p3 - (p2 + 1)); + if (IsSymbolString(symbol) && IsSymbolString(alias)) { + for (j = 0; j < ad->symbols.n; ++j) { + if (!strcmp(ad->symbols.p[j].name, symbol)) { + ad->symbols.p = realloc( + ad->symbols.p, ++ad->symbols.n * sizeof(*ad->symbols.p)); + ad->symbols.p[ad->symbols.n - 1].line = ad->symbols.p[j].line; + ad->symbols.p[ad->symbols.n - 1].name = strdup(alias); + ad->symbols.p[ad->symbols.n - 1].is_alias = true; + ad->symbols.p[ad->symbols.n - 1].javadown = + ad->symbols.p[j].javadown; + break; + } + } + } + free(symbol); + free(alias); + } + state = OTHER; + } else { + state = OTHER; + } + break; + case COM: + if (code[i] == '\n') { + ++line; + if (i + 1 < size && code[i + 1] != '/') { + state = BOL; + end_docstring = i + 1; + if (!memmem(code + start_docstring, end_docstring - start_docstring, + "/\t@", 3)) { + start_docstring = 0; + end_docstring = 0; + } + } + } + break; + case SYM: + if (code[i] == ':' && end_docstring > start_docstring) { + ad->symbols.p = + realloc(ad->symbols.p, ++ad->symbols.n * sizeof(*ad->symbols.p)); + ad->symbols.p[ad->symbols.n - 1].line = start_line; + ad->symbols.p[ad->symbols.n - 1].name = + strndup(code + start_symbol, i - start_symbol); + ad->symbols.p[ad->symbols.n - 1].is_alias = false; + ad->symbols.p[ad->symbols.n - 1].javadown = ParseJavadown( + code + start_docstring, end_docstring - start_docstring); + end_docstring = 0; + start_docstring = 0; + state = OTHER; + } else if (code[i] == '\n') { + ++line; + state = BOL; + } else if (!IsSymbolChar2(code[i])) { + state = OTHER; + } + break; + case OTHER: + if (code[i] == '\n') { + ++line; + state = BOL; + } + break; + default: + unreachable; + } + } + return ad; +} + +/** + * Frees object returned by ParseAsmdown(). + */ +void FreeAsmdown(struct Asmdown *ad) { + int i; + if (ad) { + for (i = 0; i < ad->symbols.n; ++i) { + free(ad->symbols.p[i].name); + if (!ad->symbols.p[i].is_alias) { + FreeJavadown(ad->symbols.p[i].javadown); + } + } + free(ad->symbols.p); + free(ad); + } +} diff --git a/tool/build/lib/asmdown.h b/tool/build/lib/asmdown.h new file mode 100644 index 00000000..aa413b63 --- /dev/null +++ b/tool/build/lib/asmdown.h @@ -0,0 +1,24 @@ +#ifndef COSMOPOLITAN_TOOL_BUILD_LIB_ASMDOWN_H_ +#define COSMOPOLITAN_TOOL_BUILD_LIB_ASMDOWN_H_ +#include "tool/build/lib/javadown.h" +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +struct Asmdown { + struct AsmdownSymbols { + size_t n; + struct AsmdownSymbol { + int line; + char *name; + bool is_alias; + struct Javadown *javadown; + } * p; + } symbols; +}; + +struct Asmdown *ParseAsmdown(const char *, size_t); +void FreeAsmdown(struct Asmdown *); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_TOOL_BUILD_LIB_ASMDOWN_H_ */ diff --git a/tool/build/lib/demangle.c b/tool/build/lib/demangle.c index 5023e888..1ada7671 100644 --- a/tool/build/lib/demangle.c +++ b/tool/build/lib/demangle.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" #include "libc/calls/struct/iovec.h" diff --git a/tool/build/lib/dis.c b/tool/build/lib/dis.c index b042ce0e..5f52890a 100644 --- a/tool/build/lib/dis.c +++ b/tool/build/lib/dis.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" #include "libc/alg/arraylist2.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/bing.internal.h" #include "libc/fmt/fmt.h" #include "libc/fmt/itoa.h" diff --git a/tool/build/lib/interner.c b/tool/build/lib/interner.c index fd2580db..7d521a6f 100644 --- a/tool/build/lib/interner.c +++ b/tool/build/lib/interner.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/arraylist.internal.h" #include "libc/alg/arraylist2.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/crc32.h" #include "libc/str/str.h" diff --git a/tool/build/lib/javadown.c b/tool/build/lib/javadown.c index 0b9014b5..c8e6d141 100644 --- a/tool/build/lib/javadown.c +++ b/tool/build/lib/javadown.c @@ -66,7 +66,7 @@ static unsigned GetSpacePrefixLen(const char *p, size_t n) { static unsigned GetSpaceStarPrefixLen(const char *p, size_t n) { int i; i = GetSpacePrefixLen(p, n); - return i < n && (p[i] == '*') ? i + 1 : 0; + return i < n && (p[i] == '*' || p[i] == '/') ? i + 1 : 0; } static unsigned GetTagLen(const char *p, size_t n) { diff --git a/tool/build/lib/panel.c b/tool/build/lib/panel.c index c91f7912..dc04466a 100644 --- a/tool/build/lib/panel.c +++ b/tool/build/lib/panel.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/conv.h" #include "libc/mem/mem.h" #include "libc/str/str.h" diff --git a/tool/build/lib/pty.c b/tool/build/lib/pty.c index bc95632d..f7d7cc80 100644 --- a/tool/build/lib/pty.c +++ b/tool/build/lib/pty.c @@ -20,7 +20,7 @@ #include "libc/alg/arraylist2.internal.h" #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/conv.h" #include "libc/fmt/itoa.h" #include "libc/log/check.h" diff --git a/tool/build/lib/xmmtype.c b/tool/build/lib/xmmtype.c index 4c64d206..37be3171 100644 --- a/tool/build/lib/xmmtype.c +++ b/tool/build/lib/xmmtype.c @@ -38,6 +38,14 @@ static void UpdateXmmSizes(struct Machine *m, struct XmmType *xt, int regsize, void UpdateXmmType(struct Machine *m, struct XmmType *xt) { switch (m->xedd->op.dispatch) { + case 0x110: + case 0x111: // MOVSS,MOVSD + if (Rep(m->xedd->op.rde) == 3) { + UpdateXmmTypes(m, xt, kXmmFloat, kXmmFloat); + } else if (Rep(m->xedd->op.rde) == 2) { + UpdateXmmTypes(m, xt, kXmmDouble, kXmmDouble); + } + break; case 0x12E: // UCOMIS case 0x12F: // COMIS case 0x151: // SQRT diff --git a/tool/build/lz4toasm.c b/tool/build/lz4toasm.c index aedce0be..0f3605d5 100644 --- a/tool/build/lz4toasm.c +++ b/tool/build/lz4toasm.c @@ -26,7 +26,7 @@ #include "libc/mem/mem.h" #include "libc/nexgen32e/kompressor.h" #include "libc/nexgen32e/lz4.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #include "libc/runtime/gc.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" diff --git a/tool/build/mkdeps.c b/tool/build/mkdeps.c index 6a63ada6..0fcbc767 100644 --- a/tool/build/mkdeps.c +++ b/tool/build/mkdeps.c @@ -23,7 +23,7 @@ #include "libc/alg/bisectcarleft.internal.h" #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/errno.h" @@ -32,7 +32,7 @@ #include "libc/log/log.h" #include "libc/macros.h" #include "libc/nexgen32e/crc32.h" -#include "libc/runtime/ezmap.h" +#include "libc/runtime/ezmap.internal.h" #include "libc/runtime/gc.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" diff --git a/tool/build/package.c b/tool/build/package.c index 8d1b99f8..a7cc54c4 100644 --- a/tool/build/package.c +++ b/tool/build/package.c @@ -23,7 +23,7 @@ #include "libc/alg/bisectcarleft.internal.h" #include "libc/assert.h" #include "libc/bits/bswap.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/elf/def.h" diff --git a/tool/build/refactor.c b/tool/build/refactor.c index 254d211a..349619a0 100644 --- a/tool/build/refactor.c +++ b/tool/build/refactor.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" #include "libc/assert.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/dirent.h" #include "libc/calls/struct/stat.h" diff --git a/tool/build/runit.c b/tool/build/runit.c index cc2b8cf8..67b98890 100644 --- a/tool/build/runit.c +++ b/tool/build/runit.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" #include "libc/calls/struct/itimerval.h" diff --git a/tool/build/runitd.c b/tool/build/runitd.c index 08b64530..6ebc160c 100644 --- a/tool/build/runitd.c +++ b/tool/build/runitd.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" #include "libc/calls/struct/sigaction.h" diff --git a/tool/build/zipobj.c b/tool/build/zipobj.c index c729d03f..1e745b1a 100644 --- a/tool/build/zipobj.c +++ b/tool/build/zipobj.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/arraylist.internal.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/calls/struct/timespec.h" diff --git a/tool/calc/calc.c b/tool/calc/calc.c index 4b1e44f2..ddcd6903 100644 --- a/tool/calc/calc.c +++ b/tool/calc/calc.c @@ -19,7 +19,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" #include "libc/bits/bswap.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/errno.h" #include "libc/fmt/conv.h" diff --git a/tool/decode/elf.c b/tool/decode/elf.c index 8f2b7dbf..0161e4c5 100644 --- a/tool/decode/elf.c +++ b/tool/decode/elf.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/elf/elf.h" diff --git a/tool/decode/lib/asmcodegen.c b/tool/decode/lib/asmcodegen.c index 4c0e31f5..785fdc55 100644 --- a/tool/decode/lib/asmcodegen.c +++ b/tool/decode/lib/asmcodegen.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/fmt.h" #include "libc/mem/mem.h" #include "libc/runtime/gc.h" diff --git a/tool/decode/macho.c b/tool/decode/macho.c index 86053849..84aa68af 100644 --- a/tool/decode/macho.c +++ b/tool/decode/macho.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/fmt/conv.h" diff --git a/tool/decode/mkcombos.c b/tool/decode/mkcombos.c index 207263df..12b55bea 100644 --- a/tool/decode/mkcombos.c +++ b/tool/decode/mkcombos.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/fmt/conv.h" #include "libc/log/check.h" diff --git a/tool/decode/mkwides.c b/tool/decode/mkwides.c index c21cf4d6..1faedc67 100644 --- a/tool/decode/mkwides.c +++ b/tool/decode/mkwides.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/fmt.h" #include "libc/log/check.h" #include "libc/mem/mem.h" diff --git a/tool/decode/pe2.c b/tool/decode/pe2.c index 4ac98671..87b2f762 100644 --- a/tool/decode/pe2.c +++ b/tool/decode/pe2.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/fmt/conv.h" diff --git a/tool/decode/zip.c b/tool/decode/zip.c index 0e18a8aa..71bb37b6 100644 --- a/tool/decode/zip.c +++ b/tool/decode/zip.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/fmt/conv.h" diff --git a/tool/net/echoserver.c b/tool/net/echoserver.c index c3a1660f..dabcbb07 100644 --- a/tool/net/echoserver.c +++ b/tool/net/echoserver.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/arraylist.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/iovec.h" #include "libc/errno.h" diff --git a/tool/net/greenbean.c b/tool/net/greenbean.c index dcc97f23..76a96a80 100644 --- a/tool/net/greenbean.c +++ b/tool/net/greenbean.c @@ -20,7 +20,7 @@ #include "libc/alg/arraylist2.internal.h" #include "libc/bits/bits.h" #include "libc/bits/bswap.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/iovec.h" #include "libc/calls/struct/itimerval.h" diff --git a/tool/net/redbean.c b/tool/net/redbean.c index 2c539a7b..ef6d6651 100644 --- a/tool/net/redbean.c +++ b/tool/net/redbean.c @@ -20,7 +20,7 @@ #include "libc/alg/arraylist2.internal.h" #include "libc/bits/bits.h" #include "libc/bits/bswap.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/iovec.h" #include "libc/calls/struct/itimerval.h" diff --git a/tool/viz/basicidea.c b/tool/viz/basicidea.c index 2604fc21..b04bfa3a 100644 --- a/tool/viz/basicidea.c +++ b/tool/viz/basicidea.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/core/core.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" #include "libc/calls/ioctl.h" diff --git a/tool/viz/deathstar.c b/tool/viz/deathstar.c index a1fdf663..344052a2 100644 --- a/tool/viz/deathstar.c +++ b/tool/viz/deathstar.c @@ -1,5 +1,5 @@ #include "dsp/tty/tty.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/termios.h" #include "libc/log/check.h" diff --git a/tool/viz/derasterize.c b/tool/viz/derasterize.c index 1a9a39c6..6cd843e2 100644 --- a/tool/viz/derasterize.c +++ b/tool/viz/derasterize.c @@ -46,7 +46,6 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" #include "libc/x/x.h" -#include "third_party/avir/lanczos.h" #include "third_party/getopt/getopt.h" #include "third_party/stb/stb_image.h" #include "third_party/stb/stb_image_resize.h" diff --git a/tool/viz/fold.c b/tool/viz/fold.c index 1607a550..bc9ec707 100644 --- a/tool/viz/fold.c +++ b/tool/viz/fold.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/errno.h" #include "libc/fmt/conv.h" #include "libc/runtime/runtime.h" diff --git a/tool/viz/generatematrix.c b/tool/viz/generatematrix.c index bc0649a4..5b0fad25 100644 --- a/tool/viz/generatematrix.c +++ b/tool/viz/generatematrix.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/conv.h" #include "libc/fmt/fmt.h" #include "libc/limits.h" diff --git a/tool/viz/lib/formatstringtable-assembly.c b/tool/viz/lib/formatstringtable-assembly.c index 7e1c261e..91a4a992 100644 --- a/tool/viz/lib/formatstringtable-assembly.c +++ b/tool/viz/lib/formatstringtable-assembly.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/itoa.h" #include "libc/runtime/gc.h" #include "libc/str/str.h" diff --git a/tool/viz/lib/formatstringtable-code.c b/tool/viz/lib/formatstringtable-code.c index 9b71a670..39f4a555 100644 --- a/tool/viz/lib/formatstringtable-code.c +++ b/tool/viz/lib/formatstringtable-code.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/itoa.h" #include "tool/viz/lib/formatstringtable.h" diff --git a/tool/viz/lib/formatstringtable.c b/tool/viz/lib/formatstringtable.c index 17fcf6f1..064ae58d 100644 --- a/tool/viz/lib/formatstringtable.c +++ b/tool/viz/lib/formatstringtable.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/unicode/unicode.h" #include "tool/viz/lib/formatstringtable.h" diff --git a/libc/str/knobs.c b/tool/viz/lib/knobs.c similarity index 100% rename from libc/str/knobs.c rename to tool/viz/lib/knobs.c diff --git a/tool/viz/lib/vizlib.mk b/tool/viz/lib/vizlib.mk index 9d435771..2b086d79 100644 --- a/tool/viz/lib/vizlib.mk +++ b/tool/viz/lib/vizlib.mk @@ -46,7 +46,6 @@ TOOL_VIZ_LIB_A_DIRECTDEPS = \ LIBC_TINYMATH \ LIBC_UNICODE \ LIBC_X \ - THIRD_PARTY_AVIR \ THIRD_PARTY_GDTOA \ THIRD_PARTY_DLMALLOC diff --git a/tool/viz/lib/ycbcr2rgb3.c b/tool/viz/lib/ycbcr2rgb3.c index 240320e6..70b9809f 100644 --- a/tool/viz/lib/ycbcr2rgb3.c +++ b/tool/viz/lib/ycbcr2rgb3.c @@ -46,7 +46,6 @@ #include "libc/sysv/errfuns.h" #include "libc/time/time.h" #include "libc/x/x.h" -#include "third_party/avir/lanczos.h" #include "tool/viz/lib/graphic.h" #include "tool/viz/lib/knobs.h" #include "tool/viz/lib/ycbcr.h" diff --git a/tool/viz/life.c b/tool/viz/life.c index 7c7659f2..a15157cc 100644 --- a/tool/viz/life.c +++ b/tool/viz/life.c @@ -21,7 +21,7 @@ #include "dsp/scale/scale.h" #include "libc/bits/bits.h" #include "libc/bits/popcnt.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/bits/xchg.h" #include "libc/calls/calls.h" #include "libc/calls/ioctl.h" diff --git a/tool/viz/magikarp.c b/tool/viz/magikarp.c index 4591fa8c..ba6076e6 100644 --- a/tool/viz/magikarp.c +++ b/tool/viz/magikarp.c @@ -41,8 +41,6 @@ #include "libc/sysv/consts/prot.h" #include "libc/testlib/testlib.h" #include "libc/x/x.h" -#include "third_party/avir/lanczos1b.h" -#include "third_party/avir/lanczos1f.h" #include "third_party/gdtoa/gdtoa.h" #include "third_party/getopt/getopt.h" #include "third_party/stb/stb_image.h" @@ -590,34 +588,6 @@ void ProcessImageMagikarp(unsigned yn, unsigned xn, yn, xn, lround(r_)); } -void *ProcessImageLanczosImpl(unsigned dyn, unsigned dxn, - float dst[4][dyn][dxn], unsigned syn, - unsigned sxn, float src[4][syn][sxn]) { - unsigned k; - struct lanczos1f scaler = {0}; - lanczos1finit(&scaler); - for (k = 0; k < 4; ++k) { - lanczos1f(&scaler, dyn, dxn, dst[k], syn, sxn, sxn, src[k], r_, r_, 0, 0); - } - lanczos1ffree(&scaler); - return dst; -} - -void ProcessImageLanczos(unsigned yn, unsigned xn, - unsigned char img[yn][xn][4]) { - unsigned dyn, dxn; - dyn = lround(yn / r_); - dxn = lround(xn / r_); - PrintImage2( - dyn, dxn, - f2b(dyn * dxn * 4, gc(xmalloc(dyn * dxn * 4)), - ProcessImageLanczosImpl( - dyn, dxn, gc(xmalloc(dyn * dxn * 4 * 4)), yn, xn, - b2f(yn * xn * 4, gc(xmalloc(yn * xn * 4 * 4)), - DeblinterlaceRgba2(yn, xn, gc(xmalloc(yn * xn * 4)), img)))), - dyn, dxn); -} - noinline void WithImageFile(const char *path, void fn(unsigned yn, unsigned xn, unsigned char img[yn][xn][4])) { @@ -658,9 +628,6 @@ int main(int argc, char *argv[]) { case 'S': scaler = ProcessImageGyarados; break; - case 'l': - scaler = ProcessImageLanczos; - break; case 'b': scaler = ProcessImageBilinear; break; diff --git a/tool/viz/memzoom.c b/tool/viz/memzoom.c index 11b92cce..b087f007 100644 --- a/tool/viz/memzoom.c +++ b/tool/viz/memzoom.c @@ -21,7 +21,7 @@ #include "libc/bits/bits.h" #include "libc/bits/hilbert.h" #include "libc/bits/morton.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/ioctl.h" #include "libc/calls/struct/sigaction.h" diff --git a/tool/viz/printimage.c b/tool/viz/printimage.c index ebbd01b6..4c700832 100644 --- a/tool/viz/printimage.c +++ b/tool/viz/printimage.c @@ -24,7 +24,7 @@ #include "dsp/tty/tty.h" #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/ioctl.h" #include "libc/calls/struct/stat.h" diff --git a/tool/viz/printpeb.c b/tool/viz/printpeb.c index bd8bfec9..da2609d4 100644 --- a/tool/viz/printpeb.c +++ b/tool/viz/printpeb.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/dce.h" diff --git a/tool/viz/printvideo.c b/tool/viz/printvideo.c index 2e46915b..d1ef669f 100644 --- a/tool/viz/printvideo.c +++ b/tool/viz/printvideo.c @@ -28,7 +28,7 @@ #include "libc/alg/arraylist.internal.h" #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/bits/xchg.h" #include "libc/calls/calls.h" #include "libc/calls/hefty/spawn.h" @@ -94,7 +94,6 @@ #include "libc/time/time.h" #include "libc/unicode/unicode.h" #include "libc/x/x.h" -#include "third_party/avir/lanczos.h" #include "third_party/getopt/getopt.h" #include "third_party/stb/stb_image_resize.h" #include "tool/viz/lib/graphic.h" diff --git a/tool/viz/resize.c b/tool/viz/resize.c index 90fecf08..586aec74 100644 --- a/tool/viz/resize.c +++ b/tool/viz/resize.c @@ -32,7 +32,6 @@ #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" -#include "third_party/avir/resize.h" #include "third_party/zlib/zlib.h" void ProcessFile(const char *path) { diff --git a/tool/viz/tabalign.c b/tool/viz/tabalign.c index 68cb0983..b9fd56c5 100644 --- a/tool/viz/tabalign.c +++ b/tool/viz/tabalign.c @@ -18,7 +18,7 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/arraylist.internal.h" -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/fmt/conv.h" #include "libc/limits.h" #include "libc/log/check.h" diff --git a/tool/viz/tailf.c b/tool/viz/tailf.c index f0fe3f77..e9a663fc 100644 --- a/tool/viz/tailf.c +++ b/tool/viz/tailf.c @@ -17,7 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/safemacros.internal.h" +#include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/macros.h" diff --git a/tool/viz/viz.mk b/tool/viz/viz.mk index 7093df7e..de4dc681 100644 --- a/tool/viz/viz.mk +++ b/tool/viz/viz.mk @@ -48,7 +48,6 @@ TOOL_VIZ_DIRECTDEPS = \ LIBC_UNICODE \ LIBC_X \ NET_HTTP \ - THIRD_PARTY_AVIR \ THIRD_PARTY_DLMALLOC \ THIRD_PARTY_GDTOA \ THIRD_PARTY_GETOPT \