sistema_progs

Programas para customizar o meu entorno de traballo nos meus equipos persoais
Log | Files | Refs

icons-hash.c (8800B)


      1 /*
      2  * simple program which outputs a hash-table of `icons_ext` with low collusion.
      3  * the hash function is case-insensitive, it also doesn't hash beyond the
      4  * length of the longest extension.
      5  */
      6 
      7 #include <stddef.h>
      8 #include <stdint.h>
      9 #include <inttypes.h>
     10 
     11 #define GOLDEN_RATIO_32  UINT32_C(2654442313) /* golden ratio for 32bits: (2^32) / 1.61803 */
     12 #define GOLDEN_RATIO_64  UINT64_C(0x9E3793492EEDC3F7)
     13 #define ICONS_TABLE_SIZE 8 /* size in bits. 8 = 256 */
     14 
     15 #ifndef TOUPPER
     16 	#define TOUPPER(ch)     (((ch) >= 'a' && (ch) <= 'z') ? ((ch) - 'a' + 'A') : (ch))
     17 #endif
     18 
     19 /* all of this is just for the static hash-table generation. only the hash
     20  * function gets included in `nnn` binary.
     21  */
     22 #ifdef ICONS_GENERATE
     23 
     24 #include <stdio.h>
     25 #include <string.h>
     26 #include <stdlib.h>
     27 #include <limits.h>
     28 #include "icons.h"
     29 
     30 /* like assert, but always sticks around during generation. */
     31 #define ENSURE(X) do { \
     32 	if (!(X)) { \
     33 		fprintf(stderr, "%s:%d: `%s`\n", __FILE__, __LINE__, #X); \
     34 		abort(); \
     35 	} \
     36 } while (0)
     37 #define ARRLEN(X) (sizeof(X) / sizeof((X)[0]))
     38 #define MAX(A, B) ((A) > (B) ? (A) : (B))
     39 #define HGEN_ITERARATION (1ul << 13)
     40 #define ICONS_PROBE_MAX_ALLOWED 6
     41 #define ICONS_MATCH_MAX (512)
     42 
     43 #if 0 /* for logging some interesting info to stderr */
     44 	#define log(...)  fprintf(stderr, "[INFO]: " __VA_ARGS__)
     45 #else
     46 	#define log(...) ((void)0)
     47 #endif
     48 
     49 static uint32_t icon_ext_hash(const char *s);
     50 
     51 /* change ICONS_TABLE_SIZE to increase the size of the table */
     52 static struct icon_pair table[1u << ICONS_TABLE_SIZE];
     53 static uint8_t seen[ARRLEN(table)];
     54 /* arbitrarily picked starting position. change if needed.
     55  * but ensure they're above 1 and prefer prime numbers.
     56  */
     57 static uint32_t hash_start = 7;
     58 static uint32_t hash_mul = 251; /* unused as of now */
     59 
     60 /*
     61  * use robin-hood insertion to reduce the max probe length
     62  */
     63 static void
     64 rh_insert(const struct icon_pair item, uint32_t idx, uint32_t n)
     65 {
     66 	ENSURE(n != 0);
     67 	for (uint32_t tries = 0; tries < ARRLEN(table); ++tries, ++n) {
     68 		if (seen[idx] < n) {
     69 			struct icon_pair tmp_item = table[idx];
     70 			uint32_t tmp_n = seen[idx];
     71 
     72 			ENSURE(n < (uint8_t)-1);
     73 			table[idx] = item;
     74 			seen[idx] = n;
     75 
     76 			if (tmp_n != 0) /* the slot we inserted to wasn't empty */
     77 				rh_insert(tmp_item, idx, tmp_n);
     78 			return;
     79 		}
     80 		idx = (idx + 1) % ARRLEN(table);
     81 	}
     82 	ENSURE(0 && "unreachable");
     83 }
     84 
     85 enum { PROBE_MAX, PROBE_TOTAL, PROBE_CNT };
     86 static unsigned int *
     87 table_populate(unsigned int p[static PROBE_CNT])
     88 {
     89 	memset(seen, 0x0, sizeof seen);
     90 	memset(table, 0x0, sizeof table);
     91 	for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
     92 		if (icons_ext[i].icon[0] == '\0') /* skip empty entries */
     93 			continue;
     94 		uint32_t h = icon_ext_hash(icons_ext[i].match);
     95 		rh_insert(icons_ext[i], h, 1);
     96 	}
     97 
     98 	p[PROBE_MAX] = p[PROBE_TOTAL] = 0;
     99 	for (size_t i = 0; i < ARRLEN(seen); ++i) {
    100 		p[PROBE_MAX] = MAX(p[PROBE_MAX], seen[i]);
    101 		p[PROBE_TOTAL] += seen[i];
    102 	}
    103 	return p;
    104 }
    105 
    106 /* permuted congruential generator */
    107 static uint32_t
    108 pcg(uint64_t *state)
    109 {
    110 	uint64_t oldstate = *state;
    111 	*state *= GOLDEN_RATIO_64;
    112 	uint32_t r = (oldstate >> 59);
    113 	uint32_t v = (oldstate ^ (oldstate >> 18)) >> 27;
    114 	return (v >> (-r & 31)) | (v << r);
    115 }
    116 
    117 int
    118 main(void)
    119 {
    120 	ENSURE(ARRLEN(icons_ext) <= ARRLEN(table));
    121 	ENSURE(ICONS_TABLE_SIZE < 16);
    122 	ENSURE(1u << ICONS_TABLE_SIZE == ARRLEN(table));
    123 	ENSURE((GOLDEN_RATIO_32 & 1) == 1); /* must be odd */
    124 	ENSURE((GOLDEN_RATIO_64 & 1) == 1); /* must be odd */
    125 	ENSURE(hash_start > 1);
    126 	ENSURE(hash_mul > 1);
    127 	/* ensure power of 2 hashtable size which allows compiler to optimize
    128 	 * away mod (`%`) operations
    129 	 */
    130 	ENSURE((ARRLEN(table) & (ARRLEN(table) - 1)) == 0);
    131 
    132 	unsigned int max_probe = (unsigned)-1;
    133 	uint32_t best_hash_start = 0, best_hash_mul = 0, best_total_probe = 9999;
    134 	uint64_t hash_start_rng = hash_start, hash_mul_rng = hash_mul;
    135 
    136 	for (size_t i = 0; i < HGEN_ITERARATION; ++i) {
    137 		unsigned *p = table_populate((unsigned [PROBE_CNT]){0});
    138 		if (p[PROBE_MAX] < max_probe ||
    139 		    (p[PROBE_MAX] == max_probe && p[PROBE_TOTAL] < best_total_probe))
    140 		{
    141 			max_probe = p[PROBE_MAX];
    142 			best_total_probe = p[PROBE_TOTAL];
    143 			best_hash_start = hash_start;
    144 			best_hash_mul = hash_mul;
    145 		}
    146 		hash_start = pcg(&hash_start_rng);
    147 		hash_mul = pcg(&hash_mul_rng);
    148 	}
    149 	ENSURE(max_probe < ICONS_PROBE_MAX_ALLOWED);
    150 	hash_start = best_hash_start;
    151 	hash_mul = best_hash_mul;
    152 	{
    153 		unsigned *p = table_populate((unsigned [PROBE_CNT]){0});
    154 		ENSURE(p[PROBE_MAX] == max_probe);
    155 		ENSURE(p[PROBE_TOTAL] == best_total_probe);
    156 	}
    157 
    158 	/* sanity check */
    159 	double nitems = 0;
    160 	unsigned int total_probe = 0;
    161 	for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
    162 		if (icons_ext[i].icon[0] == 0)
    163 			continue;
    164 		uint32_t found = 0, h = icon_ext_hash(icons_ext[i].match);
    165 		for (uint32_t k = 0; k < max_probe; ++k) {
    166 			uint32_t z = (h + k) % ARRLEN(table);
    167 			++total_probe;
    168 			if (table[z].match && strcasecmp(icons_ext[i].match, table[z].match) == 0) {
    169 				found = 1;
    170 				break;
    171 			}
    172 		}
    173 		ENSURE(found);
    174 		++nitems;
    175 	}
    176 	ENSURE(total_probe == best_total_probe);
    177 
    178 	size_t match_max = 0, icon_max = 0;
    179 	for (size_t i = 0; i < ARRLEN(icons_name); ++i) {
    180 		match_max = MAX(match_max, strlen(icons_name[i].match) + 1);
    181 		icon_max = MAX(icon_max, strlen(icons_name[i].icon) + 1);
    182 	}
    183 	for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
    184 		match_max = MAX(match_max, strlen(icons_ext[i].match) + 1);
    185 		icon_max = MAX(icon_max, strlen(icons_ext[i].icon) + 1);
    186 	}
    187 	icon_max = MAX(icon_max, strlen(dir_icon.icon) + 1);
    188 	icon_max = MAX(icon_max, strlen(exec_icon.icon) + 1);
    189 	icon_max = MAX(icon_max, strlen(file_icon.icon) + 1);
    190 	ENSURE(icon_max < ICONS_MATCH_MAX);
    191 
    192 	const char *uniq[ARRLEN(icons_ext)] = {0};
    193 	size_t uniq_head = 0;
    194 	for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
    195 		if (icons_ext[i].icon[0] == 0)
    196 			continue;
    197 		int isuniq = 1;
    198 		for (size_t k = 0; k < uniq_head; ++k) {
    199 			if (strcasecmp(uniq[k], icons_ext[i].icon) == 0) {
    200 				isuniq = 0;
    201 				break;
    202 			}
    203 		}
    204 		if (isuniq) {
    205 			ENSURE(uniq_head < ARRLEN(uniq));
    206 			uniq[uniq_head++] = icons_ext[i].icon;
    207 		}
    208 	}
    209 	ENSURE(uniq_head < (unsigned char)-1);
    210 
    211 	log("load-factor:  %.2f (%u/%zu)\n", (nitems * 100.0) / (double)ARRLEN(table),
    212 	    (unsigned int)nitems, ARRLEN(table));
    213 	log("max_probe  : %6u\n", max_probe);
    214 	log("total_probe: %6u\n", total_probe);
    215 	log("uniq icons : %6zu\n", uniq_head);
    216 	log("no-compact : %6zu bytes\n", ARRLEN(table) * icon_max);
    217 	log("compaction : %6zu bytes\n", uniq_head * icon_max + ARRLEN(table));
    218 	log("hash_start : %6" PRIu32 "\n", hash_start);
    219 	log("hash_mul   : %6" PRIu32 "\n", hash_mul);
    220 
    221 	printf("#ifndef INCLUDE_ICONS_GENERATED\n");
    222 	printf("#define INCLUDE_ICONS_GENERATED\n\n");
    223 
    224 	printf("/*\n * NOTE: This file is automatically generated.\n");
    225 	printf(" * DO NOT EDIT THIS FILE DIRECTLY.\n");
    226 	printf(" * Use `icons.h` to customize icons\n */\n\n");
    227 
    228 	printf("#define hash_start  UINT32_C(%" PRIu32 ")\n", hash_start);
    229 	printf("#define hash_mul    UINT32_C(%" PRIu32 ")\n\n", hash_mul);
    230 	printf("#define ICONS_PROBE_MAX %uu\n", max_probe);
    231 	printf("#define ICONS_MATCH_MAX %zuu\n\n", match_max);
    232 	printf("#define ICONS_STR_MAX %zuu\n\n", icon_max);
    233 
    234 	printf("struct icon_pair { const char match[ICONS_MATCH_MAX]; "
    235 	       "const char icon[ICONS_STR_MAX]; unsigned char color; };\n\n");
    236 
    237 	printf("static const char icons_ext_uniq[%zu][ICONS_STR_MAX] = {\n", uniq_head);
    238 	for (size_t i = 0; i < uniq_head; ++i)
    239 		printf("\t\"%s\",\n", uniq[i]);
    240 	printf("};\n\n");
    241 
    242 	printf("static const struct {\n\tconst char match[ICONS_MATCH_MAX];\n"
    243 	       "\tunsigned char idx;\n\tunsigned char color;\n} icons_ext[%zu] = {\n",
    244 	        ARRLEN(table));
    245 	for (size_t i = 0; i < ARRLEN(table); ++i) {
    246 		if (table[i].icon == NULL || table[i].icon[0] == '\0') /* skip empty entries */
    247 			continue;
    248 		size_t k;
    249 		for (k = 0; k < uniq_head; ++k) {
    250 			if (strcasecmp(table[i].icon, uniq[k]) == 0)
    251 				break;
    252 		}
    253 		ENSURE(k < uniq_head);
    254 		printf("\t[%3zu] = {\"%s\", %zu, %hhu },\n",
    255 		       i, table[i].match, k, table[i].color);
    256 	}
    257 	printf("};\n\n");
    258 
    259 	printf("#endif /* INCLUDE_ICONS_GENERATED */\n");
    260 }
    261 
    262 #else
    263 	#define ENSURE(X) ((void)0)
    264 #endif /* ICONS_GENERATE */
    265 
    266 #if defined(ICONS_GENERATE) || defined(ICONS_ENABLED)
    267 static uint32_t
    268 icon_ext_hash(const char *str)
    269 {
    270 	uint32_t i, hash = hash_start;
    271 	enum { wsz = sizeof hash * CHAR_BIT, z = wsz - ICONS_TABLE_SIZE, r = 5 };
    272 
    273 	/* just an xor-rotate hash. in general, this is a horrible hash
    274 	 * function but for our specific input it works fine while being
    275 	 * computationally cheap.
    276 	 */
    277 	for (i = 0; i < ICONS_MATCH_MAX && str[i] != '\0'; ++i) {
    278 		hash ^= TOUPPER((unsigned char)str[i]);
    279 		hash  = (hash >> (wsz - r)) | (hash << r);
    280 	}
    281 
    282 	/* finalizer: https://probablydance.com/2018/06/16 */
    283 	hash ^= (hash >> z);
    284 	hash *= GOLDEN_RATIO_32;
    285 
    286 	hash >>= z;
    287 	ENSURE(hash < ARRLEN(table));
    288 
    289 	return hash;
    290 }
    291 #endif