commit 08a5e9d6448773b5dff7eb91ef3bf776ef0ca5f9
parent 5f0c7ab87b3f809b8f4639eb5b6d64ca322a9897
Author: Andrew Kloet <andrew@kloet.net>
Date: Wed, 29 Apr 2026 11:59:37 -0400
use native ncursesw UTF-8
With this commit we now have support for inputting UTF-8 and no longer
rely on the manual implementation for UTF-8 decode. We were already
linking with ncursesw so there are not really any compromises there.
UTF-8 is obviously significantly more cumbersome to implement correctly
as opposed to ASCII and I would not bet my last dollar that no mistakes
were made. Fingers crossed!
Diffstat:
| M | cio.1 | | | 4 | ++-- |
| M | cio.c | | | 272 | ++++++++++++++++++++++++++++++++++++------------------------------------------- |
| M | config.mk | | | 2 | +- |
3 files changed, 127 insertions(+), 151 deletions(-)
diff --git a/cio.1 b/cio.1
@@ -1,4 +1,4 @@
-.Dd April 2, 2026
+.Dd April 29, 2026
.Dt CIO 1
.Os
.Sh NAME
@@ -17,7 +17,7 @@
.
.Sh DESCRIPTION
.Nm
-is a multiplexing curses interface for IRC.
+is a multiplexing curses interface for IRC featuring TLS, UTF-8, and scrollback.
.Nm
does not aim for complete coverage of rfc2812, but rather focuses on taking
design choices with the aim of creating hackable code that is easily extendable.
diff --git a/cio.c b/cio.c
@@ -9,8 +9,7 @@
* PRIVMSG. IRC protocol parsing is handled via a string tokenizer, dispatching
* commands through a lookup table in scmd().
*
- * UI rendering is handled by ncurses. To support UTF-8, the client manually
- * decodes runes before passing them to the wide-character add_wch function.
+ * UI rendering is handled by ncurses and UTF-8 is supported by ncursesw.
*
* To understand the lifecycle start reading main().
*/
@@ -25,6 +24,8 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
+#include <wchar.h>
+#include <wctype.h>
#include <arpa/inet.h>
#include <netdb.h>
@@ -42,10 +43,12 @@
#undef CTRL
#define CTRL(x) (x & 037)
+#define IS_CONT(b) (((unsigned char)(b) & 0xC0) == 0x80)
#define GET_ARG(i) ((argc > (i)) ? argv[i] : "")
#define SCROLL 15
#define INDENT 23
+#define TABSTOP 8
#define DATEFMT "%H:%M"
#define PFMT " %-12s < %s"
#define PFMTHIGH "> %-12s < %s"
@@ -85,8 +88,6 @@ enum {
RuneInvalid = 0xFFFD,
};
-typedef wchar_t Rune;
-
static struct {
size_t x, y;
WINDOW *sw, *mw, *iw;
@@ -125,11 +126,6 @@ static int nch, ch; /* Current number of channels, and current channel. */
static char outb[BufSz], *outp = outb; /* Output buffer. */
static FILE *logfp;
-static const unsigned char utfbyte[UtfSz + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0};
-static const unsigned char utfmask[UtfSz + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
-static const Rune utfmin[UtfSz + 1] = {0, 0, 0x80, 0x800, 0x10000};
-static const Rune utfmax[UtfSz + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
-
static void scmd(char *, char *, int, char **);
static void tdrawbar(void);
static void tdrawinput(void);
@@ -151,49 +147,6 @@ die(const char *fmt, ...)
exit(1);
}
-static size_t
-utf8validate(Rune *u, size_t i)
-{
- if (*u < utfmin[i] || *u > utfmax[i] || (0xD800 <= *u && *u <= 0xDFFF))
- *u = RuneInvalid;
- for (i = 1; *u > utfmax[i]; ++i)
- ;
- return i;
-}
-
-static Rune
-utf8decodebyte(unsigned char c, size_t *i)
-{
- for (*i = 0; *i < UtfSz + 1; ++(*i))
- if ((c & utfmask[*i]) == utfbyte[*i])
- return c & ~utfmask[*i];
- return 0;
-}
-
-static size_t
-utf8decode(const char *c, Rune *u, size_t clen)
-{
- size_t i, j, len, type;
- Rune udecoded;
-
- *u = RuneInvalid;
- if (!clen)
- return 0;
- udecoded = utf8decodebyte(c[0], &len);
- if (len < 1 || len > UtfSz)
- return 1;
- for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
- udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
- if (type != 0)
- return j;
- }
- if (j < len)
- return 0;
- *u = udecoded;
- utf8validate(u, len);
- return len;
-}
-
static int
empty(const char *str) {
return (str == NULL || str[0] == '\0');
@@ -412,39 +365,54 @@ chdel(const char *name)
static char *
pushl(char *p, char *e)
{
- size_t x;
- char *w;
- Rune u[2];
+ size_t x = 0;
+ wchar_t wc;
+ int n, cl;
cchar_t cc;
-
- u[1] = 0;
- if ((w = memchr(p, '\n', e - p)))
- e = w + 1;
- w = p;
- x = 0;
- for (;;) {
- if (x >= scr.x) {
+ char *eol = memchr(p, '\n', e - p);
+
+ if (!eol) eol = e;
+ mbtowc(NULL, NULL, 0);
+ while (p < eol) {
+ char *word_end = p;
+ int word_width = 0;
+ while (word_end < eol) {
+ wchar_t wwc;
+ int wn = mbtowc(&wwc, word_end, eol - word_end);
+ if (wn <= 0 || iswspace(wwc)) break;
+ int wcl = wcwidth(wwc);
+ if (wcl > 0) word_width += wcl;
+ word_end += wn;
+ }
+ if (x + word_width >= scr.x && word_width < (scr.x - INDENT)) {
waddch(scr.mw, '\n');
- for (x = 0; x < INDENT; x++)
- waddch(scr.mw, ' ');
- if (*w == ' ')
- w++;
- x += p - w;
+ for (x = 0; x < INDENT; x++) waddch(scr.mw, ' ');
+ while (p < eol && iswspace(*p)) p++;
}
- if (p >= e || *p == ' ' || p-w+INDENT >= (ptrdiff_t)scr.x-1) {
- while (w < p) {
- w += utf8decode(w, u, UtfSz);
- if (wcwidth(*u) > 0 || *u == '\n') {
- setcchar(&cc, u, 0, 0, 0);
- wadd_wch(scr.mw, &cc);
- }
+ while (p < eol) {
+ if ((n = mbtowc(&wc, p, eol - p)) <= 0) {
+ mbtowc(NULL, NULL, 0);
+ wc = L'?'; n = 1;
+ }
+ if (iswcntrl(wc)) {
+ p += n;
+ continue;
+ }
+ cl = wcwidth(wc);
+ if (cl < 0) cl = 0;
+ if (x + cl >= (size_t)scr.x) {
+ waddch(scr.mw, '\n');
+ for (x = 0; x < INDENT; x++)
+ waddch(scr.mw, ' ');
}
- if (p >= e) return e;
+ setcchar(&cc, &wc, 0, 0, 0);
+ wadd_wch(scr.mw, &cc);
+ x += cl;
+ p += n;
+ if (iswspace(wc)) break;
}
- p += utf8decode(p, u, UtfSz);
- int cl = wcwidth(*u);
- if (cl >= 0) x += cl;
}
+ return (eol < e) ? eol + 1 : e;
}
static void
@@ -753,38 +721,24 @@ static void
tredraw(void)
{
struct Chan *const c = &chl[ch];
- char *q, *p;
- int row_idx = -1;
+ char *p = c->eol, *start = c->buf;
+ int msg_count = 0;
- if (c->eol == c->buf) {
- wclear(scr.mw);
- wnoutrefresh(scr.mw);
- return;
- }
- p = c->eol - 1;
- if (c->n) {
- int i = c->n;
- for (; p > c->buf; p--)
- if (*p == '\n' && !i--)
- break;
- if (p == c->buf)
- c->n -= i;
+ wclear(scr.mw);
+ if (c->eol == c->buf) return;
+ while (p > c->buf && msg_count < (scr.y - 2 + c->n)) {
+ char *s = p - 1;
+ while (s > c->buf && *(s - 1) != '\n') s--;
+ if (msg_count >= c->n) start = s;
+ p = s - 1;
+ msg_count++;
}
- q = p;
- while (row_idx < (int)scr.y - 2) {
- while (*q != '\n' && q > c->buf)
- q--;
- row_idx++;
- if (q == c->buf)
- break;
- q--;
+ p = start;
+ for (int i = 0; p < c->eol && i < (scr.y - 2); i++) {
+ char *next = pushl(p, c->eol);
+ if (next < c->eol) waddch(scr.mw, '\n');
+ p = next;
}
- if (q != c->buf)
- q += 2;
- wclear(scr.mw);
- wmove(scr.mw, 0, 0);
- while (q < p)
- q = pushl(q, p);
wnoutrefresh(scr.mw);
}
@@ -819,43 +773,54 @@ tdrawbar(void)
static void
tdrawinput(void)
{
- int hw = scr.x / 2;
-
- while (inp.cu < inp.shft)
- inp.shft -= (inp.shft > hw) ? hw : inp.shft;
- while (inp.cu >= inp.shft + scr.x)
- inp.shft += hw;
-
+ int v_cu = 0; /* visual cursor position */
+ size_t b_shft = 0;
+ int v_curr = 0;
+ wchar_t wc;
+ int n, cl;
+
+ mbtowc(NULL, NULL, 0);
+ for (size_t i = 0; i < inp.len; i += n) {
+ n = mbtowc(&wc, inp.buf + i, inp.len - i);
+ if (n <= 0) { n = 1; cl = 1; }
+ else { cl = (cl = wcwidth(wc)) < 0 ? 0 : cl; }
+ if (i < inp.cu) v_cu += cl;
+ if (v_curr < inp.shft) {
+ v_curr += cl;
+ b_shft = i + n;
+ }
+ }
+ if (v_cu < inp.shft)
+ inp.shft = (v_cu > scr.x / 2) ? v_cu - scr.x / 2 : 0;
+ else if (v_cu >= inp.shft + scr.x)
+ inp.shft = v_cu - scr.x / 2;
wmove(scr.iw, 0, 0);
- for (size_t i = inp.shft; i < inp.len && i < inp.shft + scr.x; i++)
- waddch(scr.iw, inp.buf[i]);
-
+ waddnstr(scr.iw, inp.buf + b_shft, inp.len - b_shft);
wclrtoeol(scr.iw);
- wmove(scr.iw, 0, inp.cu - inp.shft);
+ wmove(scr.iw, 0, v_cu - (int)inp.shft);
wnoutrefresh(scr.iw);
}
static void
tgetch(void)
{
+ wint_t wc;
+ int res = wget_wch(scr.iw, &wc);
char *p = &inp.buf[inp.cu]; /* Current cursor position */
size_t tail = inp.len - inp.cu; /* Count of chars after cursor */
- size_t i;
- int c = wgetch(scr.iw);
+ size_t i, old;
- switch (c) {
+ switch (wc) {
case CTRL('n'):
- case CTRL('p'): {
- int d = (c == CTRL('n')) ? 1 : -1;
- ch = (ch + d + nch) % nch;
+ case CTRL('p'):
+ ch = (ch + (wc == CTRL('n') ? 1 : -1) + nch) % nch;
chl[ch].high = chl[ch].new = 0;
tdrawbar();
tredraw();
return;
- }
case KEY_PPAGE:
case KEY_NPAGE:
- chl[ch].n += (c == KEY_PPAGE) ? SCROLL : -SCROLL;
+ chl[ch].n += (wc == KEY_PPAGE) ? SCROLL : -SCROLL;
if (chl[ch].n < 0)
chl[ch].n = 0;
tredraw();
@@ -868,54 +833,65 @@ tgetch(void)
break;
case CTRL('b'):
case KEY_LEFT:
- if (inp.cu)
- inp.cu--;
+ if (inp.cu <= 0) return;
+ do { inp.cu--; }
+ while (inp.cu > 0 && IS_CONT(inp.buf[inp.cu]));
break;
case CTRL('f'):
case KEY_RIGHT:
- if (inp.cu < inp.len)
- inp.cu++;
+ if (inp.cu >= inp.len) return;
+ do { inp.cu++; }
+ while (inp.cu < inp.len && IS_CONT(inp.buf[inp.cu]));
break;
case CTRL('k'):
inp.len = inp.cu;
break;
case CTRL('u'):
if (inp.cu == 0) return;
- memmove(inp.buf, p, tail); /* Move the tail to the beginning */
+ memmove(inp.buf, p, tail);
inp.len = tail;
inp.cu = 0;
break;
case CTRL('d'):
if (inp.cu >= inp.len) return;
- memmove(p, p + 1, tail - 1); /* Shift tail left by 1 at p */
- inp.len--;
+ i = 1;
+ while (inp.cu + i < inp.len && IS_CONT(inp.buf[inp.cu + i]))
+ i++;
+ memmove(p, p + i, tail - i);
+ inp.len -= i;
break;
case CTRL('h'):
case KEY_BACKSPACE:
if (inp.cu == 0) return;
- memmove(p - 1, p, tail); /* Shift tail left by 1 at p-1 */
- inp.cu--, inp.len--;
+ old = inp.cu;
+ do { inp.cu--; } while (inp.cu > 0 && IS_CONT(inp.buf[inp.cu]));
+ memmove(&inp.buf[inp.cu], &inp.buf[old], inp.len - old);
+ inp.len -= (old - inp.cu);
break;
case CTRL('w'):
if (inp.cu == 0) break;
- i = 1;
- /* Find the start of the word (skipping trailing spaces) */
- while (i < inp.cu && inp.buf[inp.cu - i] == ' ') i++;
- while (i < inp.cu && inp.buf[inp.cu - (i + 1)] != ' ') i++;
- memmove(p - i, p, tail); /* Shift tail left by 'i' positions */
- inp.cu -= i, inp.len -= i;
+ old = inp.cu;
+ while (inp.cu > 0 && inp.buf[inp.cu - 1] == ' ') inp.cu--;
+ while (inp.cu > 0 && inp.buf[inp.cu - 1] != ' ') {
+ inp.cu--;
+ while (inp.cu > 0 && IS_CONT(inp.buf[inp.cu])) inp.cu--;
+ }
+ memmove(&inp.buf[inp.cu], &inp.buf[old], inp.len - old);
+ inp.len -= (old - inp.cu);
break;
case '\n':
inp.buf[inp.len] = 0;
uparse(inp.buf);
- inp.cu = inp.len = 0;
+ inp.cu = inp.len = inp.shft = 0;
break;
default:
- if (c > CHAR_MAX || inp.len >= BufSz - 1)
- return;
- memmove(p + 1, p, tail);
- inp.buf[inp.cu++] = c;
- inp.len++;
+ if (res == KEY_CODE_YES || (iswcntrl(wc))) return;
+ char mb[MB_LEN_MAX];
+ int n = wctomb(mb, wc);
+ if (n <= 0 || inp.len + n >= BufSz - 1) return;
+ memmove(p + n, p, tail);
+ memcpy(p, mb, n);
+ inp.cu += n; inp.len += n;
break;
}
tdrawinput();
diff --git a/config.mk b/config.mk
@@ -12,7 +12,7 @@ NCURSESINC = $(shell pkg-config --cflags-only-I ncursesw)
NCURSESLIB = $(shell pkg-config --libs ncursesw)
# OpenBSD (uncomment)
#NCURSESINC =
-#NCURSESLIB = -lncurses
+#NCURSESLIB = -lncursesw
# includes and libs
INCS = ${NCURSESINC}