Notcurses 3.0.16
a blingful library for TUIs and character graphics
Loading...
Searching...
No Matches
egcpool.h
Go to the documentation of this file.
1#ifndef NOTCURSES_EGCPOOL
2#define NOTCURSES_EGCPOOL
3
4#include <wchar.h>
5#include <errno.h>
6#include <stdio.h>
7#include <wctype.h>
8#include <stddef.h>
9#include <assert.h>
10#include <stdlib.h>
11#include <string.h>
12#include <stdbool.h>
13#include <unigbrk.h>
14#include <unictype.h>
15#include "notcurses/notcurses.h"
16#include "compat/compat.h"
17#include "logging.h"
18
19#ifdef __cplusplus
20extern "C" {
21#endif
22
23// an nccell only provides storage for up to 4 bytes of an EGC. if there's
24// anything more than that, it's spilled into the egcpool, and the nccell
25// records the offset. when an nccell is released, the egcpool memory it
26// owned is zeroed out, and made usable by another nccell.
27
28typedef struct egcpool {
29 char* pool; // ringbuffer of attached extension storage
30 int poolsize; // total number of bytes in pool
31 int poolused; // bytes actively used, grow when this gets too large
32 int poolwrite; // next place to *look for* a place to write
34
35#define POOL_MINIMUM_ALLOC BUFSIZ
36#define POOL_MAXIMUM_BYTES (1 << 24) // max 16MiB (assumes 32 bits)
37
38static inline void
39egcpool_init(egcpool* p){
40 p->pool = NULL;
41 p->poolsize = 0;
42 p->poolwrite = 0;
43 p->poolused = 0;
44}
45
46static inline int
47egcpool_grow(egcpool* pool, int len){
48 int newsize = pool->poolsize * 2;
49 if(newsize < pool->poolsize){
50 return -1; // pernicious overflow (see also POOL_MAXIMUM_BYTES check below)
51 }
52 if(newsize < POOL_MINIMUM_ALLOC){
53 newsize = POOL_MINIMUM_ALLOC;
54 }
55 while(len > newsize - pool->poolsize){ // ensure we make enough space
56 if(newsize * 2 < newsize){
57 return -1;
58 }
59 newsize *= 2;
60 }
61 if(newsize > POOL_MAXIMUM_BYTES){
62 return -1;
63 }
64 // nasty cast here because c++ source might include this header :/
65 char* tmp = (char*)realloc(pool->pool, newsize);
66 if(tmp == NULL){
67 return -1;
68 }
69 pool->pool = tmp;
70 memset(pool->pool + pool->poolsize, 0, newsize - pool->poolsize);
71 pool->poolsize = newsize;
72 return 0;
73}
74
75// get the expected length of the encoded codepoint from the first byte of a
76// utf-8 character. if the byte is illegal as a first byte, 1 is returned.
77// Table 3.1B, Legal UTF8 Byte Sequences, Corrigendum #1: UTF-8 Shortest Form.
78// subsequent ("continuation") bytes must start with the bit pattern 10.
79static inline size_t
80utf8_codepoint_length(unsigned char c){
81 if(c <= 0x7f){ // 0x000000...0x00007f
82 return 1;
83 }else if(c <= 0xc1){ // illegal continuation byte
84 return 1;
85 }else if(c <= 0xdf){ // 0x000080...0x0007ff
86 return 2;
87 }else if(c <= 0xef){ // 0x000800...0x00ffff
88 return 3;
89 }else if(c <= 0xf4){ // c <= 0xf4, 0x100000...0x10ffff
90 return 4;
91 }else{ // illegal first byte
92 return 1;
93 }
94}
95
96// Eat an EGC from the UTF-8 string input, counting bytes and columns. We use
97// libunistring's uc_is_grapheme_break() to segment EGCs. Writes the number of
98// columns to '*colcount'. Returns the number of bytes consumed, not including
99// any NUL terminator. Neither the number of bytes nor columns is necessarily
100// equal to the number of decoded code points. Such are the ways of Unicode.
101// uc_is_grapheme_break() wants UTF-32, which is fine, because we need wchar_t
102// to use wcwidth() anyway FIXME except this doesn't work with 16-bit wchar_t!
103static inline int
104utf8_egc_len(const char* gcluster, int* colcount){
105 size_t ret = 0;
106 *colcount = 0;
107 int r;
108 mbstate_t mbt;
109 memset(&mbt, 0, sizeof(mbt));
110 wchar_t wc, prevw = 0;
111 bool injoin = false;
112 do{
113 r = mbrtowc(&wc, gcluster, MB_LEN_MAX, &mbt);
114 if(r < 0){
115 // FIXME probably ought escape this somehow
116 logerror("invalid UTF8: %s", gcluster);
117 return -1;
118 }
119 if(prevw && !injoin && uc_is_grapheme_break(prevw, wc)){
120 break; // starts a new EGC, exit and do not claim
121 }
122 int cols;
123 if(uc_is_property_variation_selector(wc)){ // ends EGC
124 ret += r;
125 break;
126 }else if(wc == L'\u200d' || injoin){ // ZWJ is iswcntrl, so check it first
127 injoin = true;
128 cols = 0;
129 }else{
130 cols = wcwidth(wc);
131 if(cols < 0){
132 injoin = false;
133 if(iswspace(wc)){ // newline or tab
134 *colcount = 1;
135 return ret + 1;
136 }
137 cols = 1;
138 if(iswcntrl(wc)){
139 logerror("prohibited or invalid unicode: 0x%08x", (unsigned)wc);
140 return -1;
141 }
142 }
143 }
144 if(*colcount == 0){
145 *colcount += cols;
146 }
147 ret += r;
148 gcluster += r;
149 if(!prevw){
150 prevw = wc;
151 }
152 }while(r);
153 // FIXME what if injoin is set? incomplete EGC!
154 return ret;
155}
156
157// stash away the provided UTF8, NUL-terminated grapheme cluster. the cluster
158// should not be less than 2 bytes (such a cluster should be directly stored in
159// the cell). returns -1 on error, and otherwise a non-negative offset. 'ulen'
160// must be the number of bytes to lift from egc (utf8_egc_len()).
161__attribute__ ((nonnull (1, 2)))
162int egcpool_stash(egcpool* pool, const char* egc, size_t ulen);
163
164// remove the egc from the pool. start at offset, and zero out everything until
165// we find a zero (our own NUL terminator). remove that number of bytes from
166// the usedcount.
167static inline void
168egcpool_release(egcpool* pool, int offset){
169 size_t freed = 1; // account for free(d) NUL terminator
170 while(pool->pool[offset]){
171 pool->pool[offset] = '\0';
172 ++freed;
173 ++offset;
174 assert(offset < pool->poolsize);
175 }
176 pool->poolused -= freed;
177 // FIXME ought we update pool->poolwrite?
178}
179
180static inline void
181egcpool_dump(egcpool* pool){
182 free(pool->pool);
183 egcpool_init(pool);
184}
185
186// get the offset into the egcpool for this cell's EGC. returns meaningless and
187// unsafe results if called on a simple cell.
188static inline uint32_t
189cell_egc_idx(const nccell* c){
190 return (htole(c->gcluster) & 0x00fffffflu);
191}
192
193// Is the cell a spilled (more than 4 byte) UTF8 EGC?
194static inline bool
195cell_extended_p(const nccell* c){
196 return (htole(c->gcluster) & 0xff000000ul) == 0x01000000ul;
197}
198
199// Is the cell simple (a UTF8-encoded EGC of four bytes or fewer)?
200static inline bool
201cell_simple_p(const nccell* c){
202 return !cell_extended_p(c);
203}
204
205// only applies to complex cells, do not use on simple cells
206__attribute__ ((__returns_nonnull__)) static inline const char*
207egcpool_extended_gcluster(const egcpool* pool, const nccell* c) {
208 assert(cell_extended_p(c));
209 uint32_t idx = cell_egc_idx(c);
210 return pool->pool + idx;
211}
212
213// Duplicate the contents of EGCpool 'src' onto another, wiping out any prior
214// contents in 'dst'.
215static inline int
216egcpool_dup(egcpool* dst, const egcpool* src){
217 if(src->pool){
218 char* tmp;
219 if((tmp = (char*)realloc(dst->pool, src->poolsize)) == NULL){
220 return -1;
221 }
222 dst->pool = tmp;
223 memcpy(dst->pool, src->pool, src->poolsize);
224 }
225 dst->poolsize = src->poolsize;
226 dst->poolused = src->poolused;
227 dst->poolwrite = src->poolwrite;
228 return 0;
229}
230
231#ifdef __cplusplus
232}
233#endif
234
235#endif
#define POOL_MAXIMUM_BYTES
Definition egcpool.h:36
const char * egc
Definition egcpool.h:162
const char size_t ulen
Definition egcpool.h:162
const nccell * c
Definition egcpool.h:207
uint32_t idx
Definition egcpool.h:209
__attribute__((nonnull(1, 2))) int egcpool_stash(egcpool *pool
#define POOL_MINIMUM_ALLOC
Definition egcpool.h:35
int r
Definition fbuf.h:226
assert(r >=0)
#define logerror(fmt,...)
Definition logging.h:32
#define htole(x)
Definition ncport.h:36
API int API int const nccell unsigned len
Definition notcurses.h:2592
char * pool
Definition egcpool.h:29
int poolsize
Definition egcpool.h:30
int poolused
Definition egcpool.h:31
int poolwrite
Definition egcpool.h:32
uint32_t gcluster
Definition notcurses.h:693
return NULL
Definition termdesc.h:229