cio

a simple irc client
Download | Log | Files | Refs | README | LICENSE

commit 08a5e9d6448773b5dff7eb91ef3bf776ef0ca5f9
parent 5f0c7ab87b3f809b8f4639eb5b6d64ca322a9897
Author: Andrew Kloet <andrew@kloet.net>
Date:   Wed, 29 Apr 2026 11:59:37 -0400

use native ncursesw UTF-8

With this commit we now have support for inputting UTF-8 and no longer
rely on the manual implementation for UTF-8 decode. We were already
linking with ncursesw so there are not really any compromises there.

UTF-8 is obviously significantly more cumbersome to implement correctly
as opposed to ASCII and I would not bet my last dollar that no mistakes
were made. Fingers crossed!

Diffstat:
Mcio.1 | 4++--
Mcio.c | 272++++++++++++++++++++++++++++++++++++-------------------------------------------
Mconfig.mk | 2+-
3 files changed, 127 insertions(+), 151 deletions(-)

diff --git a/cio.1 b/cio.1 @@ -1,4 +1,4 @@ -.Dd April 2, 2026 +.Dd April 29, 2026 .Dt CIO 1 .Os .Sh NAME @@ -17,7 +17,7 @@ . .Sh DESCRIPTION .Nm -is a multiplexing curses interface for IRC. +is a multiplexing curses interface for IRC featuring TLS, UTF-8, and scrollback. .Nm does not aim for complete coverage of rfc2812, but rather focuses on taking design choices with the aim of creating hackable code that is easily extendable. diff --git a/cio.c b/cio.c @@ -9,8 +9,7 @@ * PRIVMSG. IRC protocol parsing is handled via a string tokenizer, dispatching * commands through a lookup table in scmd(). * - * UI rendering is handled by ncurses. To support UTF-8, the client manually - * decodes runes before passing them to the wide-character add_wch function. + * UI rendering is handled by ncurses and UTF-8 is supported by ncursesw. * * To understand the lifecycle start reading main(). */ @@ -25,6 +24,8 @@ #include <stdlib.h> #include <string.h> #include <time.h> +#include <wchar.h> +#include <wctype.h> #include <arpa/inet.h> #include <netdb.h> @@ -42,10 +43,12 @@ #undef CTRL #define CTRL(x) (x & 037) +#define IS_CONT(b) (((unsigned char)(b) & 0xC0) == 0x80) #define GET_ARG(i) ((argc > (i)) ? argv[i] : "") #define SCROLL 15 #define INDENT 23 +#define TABSTOP 8 #define DATEFMT "%H:%M" #define PFMT " %-12s < %s" #define PFMTHIGH "> %-12s < %s" @@ -85,8 +88,6 @@ enum { RuneInvalid = 0xFFFD, }; -typedef wchar_t Rune; - static struct { size_t x, y; WINDOW *sw, *mw, *iw; @@ -125,11 +126,6 @@ static int nch, ch; /* Current number of channels, and current channel. */ static char outb[BufSz], *outp = outb; /* Output buffer. */ static FILE *logfp; -static const unsigned char utfbyte[UtfSz + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0}; -static const unsigned char utfmask[UtfSz + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8}; -static const Rune utfmin[UtfSz + 1] = {0, 0, 0x80, 0x800, 0x10000}; -static const Rune utfmax[UtfSz + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; - static void scmd(char *, char *, int, char **); static void tdrawbar(void); static void tdrawinput(void); @@ -151,49 +147,6 @@ die(const char *fmt, ...) exit(1); } -static size_t -utf8validate(Rune *u, size_t i) -{ - if (*u < utfmin[i] || *u > utfmax[i] || (0xD800 <= *u && *u <= 0xDFFF)) - *u = RuneInvalid; - for (i = 1; *u > utfmax[i]; ++i) - ; - return i; -} - -static Rune -utf8decodebyte(unsigned char c, size_t *i) -{ - for (*i = 0; *i < UtfSz + 1; ++(*i)) - if ((c & utfmask[*i]) == utfbyte[*i]) - return c & ~utfmask[*i]; - return 0; -} - -static size_t -utf8decode(const char *c, Rune *u, size_t clen) -{ - size_t i, j, len, type; - Rune udecoded; - - *u = RuneInvalid; - if (!clen) - return 0; - udecoded = utf8decodebyte(c[0], &len); - if (len < 1 || len > UtfSz) - return 1; - for (i = 1, j = 1; i < clen && j < len; ++i, ++j) { - udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type); - if (type != 0) - return j; - } - if (j < len) - return 0; - *u = udecoded; - utf8validate(u, len); - return len; -} - static int empty(const char *str) { return (str == NULL || str[0] == '\0'); @@ -412,39 +365,54 @@ chdel(const char *name) static char * pushl(char *p, char *e) { - size_t x; - char *w; - Rune u[2]; + size_t x = 0; + wchar_t wc; + int n, cl; cchar_t cc; - - u[1] = 0; - if ((w = memchr(p, '\n', e - p))) - e = w + 1; - w = p; - x = 0; - for (;;) { - if (x >= scr.x) { + char *eol = memchr(p, '\n', e - p); + + if (!eol) eol = e; + mbtowc(NULL, NULL, 0); + while (p < eol) { + char *word_end = p; + int word_width = 0; + while (word_end < eol) { + wchar_t wwc; + int wn = mbtowc(&wwc, word_end, eol - word_end); + if (wn <= 0 || iswspace(wwc)) break; + int wcl = wcwidth(wwc); + if (wcl > 0) word_width += wcl; + word_end += wn; + } + if (x + word_width >= scr.x && word_width < (scr.x - INDENT)) { waddch(scr.mw, '\n'); - for (x = 0; x < INDENT; x++) - waddch(scr.mw, ' '); - if (*w == ' ') - w++; - x += p - w; + for (x = 0; x < INDENT; x++) waddch(scr.mw, ' '); + while (p < eol && iswspace(*p)) p++; } - if (p >= e || *p == ' ' || p-w+INDENT >= (ptrdiff_t)scr.x-1) { - while (w < p) { - w += utf8decode(w, u, UtfSz); - if (wcwidth(*u) > 0 || *u == '\n') { - setcchar(&cc, u, 0, 0, 0); - wadd_wch(scr.mw, &cc); - } + while (p < eol) { + if ((n = mbtowc(&wc, p, eol - p)) <= 0) { + mbtowc(NULL, NULL, 0); + wc = L'?'; n = 1; + } + if (iswcntrl(wc)) { + p += n; + continue; + } + cl = wcwidth(wc); + if (cl < 0) cl = 0; + if (x + cl >= (size_t)scr.x) { + waddch(scr.mw, '\n'); + for (x = 0; x < INDENT; x++) + waddch(scr.mw, ' '); } - if (p >= e) return e; + setcchar(&cc, &wc, 0, 0, 0); + wadd_wch(scr.mw, &cc); + x += cl; + p += n; + if (iswspace(wc)) break; } - p += utf8decode(p, u, UtfSz); - int cl = wcwidth(*u); - if (cl >= 0) x += cl; } + return (eol < e) ? eol + 1 : e; } static void @@ -753,38 +721,24 @@ static void tredraw(void) { struct Chan *const c = &chl[ch]; - char *q, *p; - int row_idx = -1; + char *p = c->eol, *start = c->buf; + int msg_count = 0; - if (c->eol == c->buf) { - wclear(scr.mw); - wnoutrefresh(scr.mw); - return; - } - p = c->eol - 1; - if (c->n) { - int i = c->n; - for (; p > c->buf; p--) - if (*p == '\n' && !i--) - break; - if (p == c->buf) - c->n -= i; + wclear(scr.mw); + if (c->eol == c->buf) return; + while (p > c->buf && msg_count < (scr.y - 2 + c->n)) { + char *s = p - 1; + while (s > c->buf && *(s - 1) != '\n') s--; + if (msg_count >= c->n) start = s; + p = s - 1; + msg_count++; } - q = p; - while (row_idx < (int)scr.y - 2) { - while (*q != '\n' && q > c->buf) - q--; - row_idx++; - if (q == c->buf) - break; - q--; + p = start; + for (int i = 0; p < c->eol && i < (scr.y - 2); i++) { + char *next = pushl(p, c->eol); + if (next < c->eol) waddch(scr.mw, '\n'); + p = next; } - if (q != c->buf) - q += 2; - wclear(scr.mw); - wmove(scr.mw, 0, 0); - while (q < p) - q = pushl(q, p); wnoutrefresh(scr.mw); } @@ -819,43 +773,54 @@ tdrawbar(void) static void tdrawinput(void) { - int hw = scr.x / 2; - - while (inp.cu < inp.shft) - inp.shft -= (inp.shft > hw) ? hw : inp.shft; - while (inp.cu >= inp.shft + scr.x) - inp.shft += hw; - + int v_cu = 0; /* visual cursor position */ + size_t b_shft = 0; + int v_curr = 0; + wchar_t wc; + int n, cl; + + mbtowc(NULL, NULL, 0); + for (size_t i = 0; i < inp.len; i += n) { + n = mbtowc(&wc, inp.buf + i, inp.len - i); + if (n <= 0) { n = 1; cl = 1; } + else { cl = (cl = wcwidth(wc)) < 0 ? 0 : cl; } + if (i < inp.cu) v_cu += cl; + if (v_curr < inp.shft) { + v_curr += cl; + b_shft = i + n; + } + } + if (v_cu < inp.shft) + inp.shft = (v_cu > scr.x / 2) ? v_cu - scr.x / 2 : 0; + else if (v_cu >= inp.shft + scr.x) + inp.shft = v_cu - scr.x / 2; wmove(scr.iw, 0, 0); - for (size_t i = inp.shft; i < inp.len && i < inp.shft + scr.x; i++) - waddch(scr.iw, inp.buf[i]); - + waddnstr(scr.iw, inp.buf + b_shft, inp.len - b_shft); wclrtoeol(scr.iw); - wmove(scr.iw, 0, inp.cu - inp.shft); + wmove(scr.iw, 0, v_cu - (int)inp.shft); wnoutrefresh(scr.iw); } static void tgetch(void) { + wint_t wc; + int res = wget_wch(scr.iw, &wc); char *p = &inp.buf[inp.cu]; /* Current cursor position */ size_t tail = inp.len - inp.cu; /* Count of chars after cursor */ - size_t i; - int c = wgetch(scr.iw); + size_t i, old; - switch (c) { + switch (wc) { case CTRL('n'): - case CTRL('p'): { - int d = (c == CTRL('n')) ? 1 : -1; - ch = (ch + d + nch) % nch; + case CTRL('p'): + ch = (ch + (wc == CTRL('n') ? 1 : -1) + nch) % nch; chl[ch].high = chl[ch].new = 0; tdrawbar(); tredraw(); return; - } case KEY_PPAGE: case KEY_NPAGE: - chl[ch].n += (c == KEY_PPAGE) ? SCROLL : -SCROLL; + chl[ch].n += (wc == KEY_PPAGE) ? SCROLL : -SCROLL; if (chl[ch].n < 0) chl[ch].n = 0; tredraw(); @@ -868,54 +833,65 @@ tgetch(void) break; case CTRL('b'): case KEY_LEFT: - if (inp.cu) - inp.cu--; + if (inp.cu <= 0) return; + do { inp.cu--; } + while (inp.cu > 0 && IS_CONT(inp.buf[inp.cu])); break; case CTRL('f'): case KEY_RIGHT: - if (inp.cu < inp.len) - inp.cu++; + if (inp.cu >= inp.len) return; + do { inp.cu++; } + while (inp.cu < inp.len && IS_CONT(inp.buf[inp.cu])); break; case CTRL('k'): inp.len = inp.cu; break; case CTRL('u'): if (inp.cu == 0) return; - memmove(inp.buf, p, tail); /* Move the tail to the beginning */ + memmove(inp.buf, p, tail); inp.len = tail; inp.cu = 0; break; case CTRL('d'): if (inp.cu >= inp.len) return; - memmove(p, p + 1, tail - 1); /* Shift tail left by 1 at p */ - inp.len--; + i = 1; + while (inp.cu + i < inp.len && IS_CONT(inp.buf[inp.cu + i])) + i++; + memmove(p, p + i, tail - i); + inp.len -= i; break; case CTRL('h'): case KEY_BACKSPACE: if (inp.cu == 0) return; - memmove(p - 1, p, tail); /* Shift tail left by 1 at p-1 */ - inp.cu--, inp.len--; + old = inp.cu; + do { inp.cu--; } while (inp.cu > 0 && IS_CONT(inp.buf[inp.cu])); + memmove(&inp.buf[inp.cu], &inp.buf[old], inp.len - old); + inp.len -= (old - inp.cu); break; case CTRL('w'): if (inp.cu == 0) break; - i = 1; - /* Find the start of the word (skipping trailing spaces) */ - while (i < inp.cu && inp.buf[inp.cu - i] == ' ') i++; - while (i < inp.cu && inp.buf[inp.cu - (i + 1)] != ' ') i++; - memmove(p - i, p, tail); /* Shift tail left by 'i' positions */ - inp.cu -= i, inp.len -= i; + old = inp.cu; + while (inp.cu > 0 && inp.buf[inp.cu - 1] == ' ') inp.cu--; + while (inp.cu > 0 && inp.buf[inp.cu - 1] != ' ') { + inp.cu--; + while (inp.cu > 0 && IS_CONT(inp.buf[inp.cu])) inp.cu--; + } + memmove(&inp.buf[inp.cu], &inp.buf[old], inp.len - old); + inp.len -= (old - inp.cu); break; case '\n': inp.buf[inp.len] = 0; uparse(inp.buf); - inp.cu = inp.len = 0; + inp.cu = inp.len = inp.shft = 0; break; default: - if (c > CHAR_MAX || inp.len >= BufSz - 1) - return; - memmove(p + 1, p, tail); - inp.buf[inp.cu++] = c; - inp.len++; + if (res == KEY_CODE_YES || (iswcntrl(wc))) return; + char mb[MB_LEN_MAX]; + int n = wctomb(mb, wc); + if (n <= 0 || inp.len + n >= BufSz - 1) return; + memmove(p + n, p, tail); + memcpy(p, mb, n); + inp.cu += n; inp.len += n; break; } tdrawinput(); diff --git a/config.mk b/config.mk @@ -12,7 +12,7 @@ NCURSESINC = $(shell pkg-config --cflags-only-I ncursesw) NCURSESLIB = $(shell pkg-config --libs ncursesw) # OpenBSD (uncomment) #NCURSESINC = -#NCURSESLIB = -lncurses +#NCURSESLIB = -lncursesw # includes and libs INCS = ${NCURSESINC}