icons-hash.c (8800B)
1 /* 2 * simple program which outputs a hash-table of `icons_ext` with low collusion. 3 * the hash function is case-insensitive, it also doesn't hash beyond the 4 * length of the longest extension. 5 */ 6 7 #include <stddef.h> 8 #include <stdint.h> 9 #include <inttypes.h> 10 11 #define GOLDEN_RATIO_32 UINT32_C(2654442313) /* golden ratio for 32bits: (2^32) / 1.61803 */ 12 #define GOLDEN_RATIO_64 UINT64_C(0x9E3793492EEDC3F7) 13 #define ICONS_TABLE_SIZE 8 /* size in bits. 8 = 256 */ 14 15 #ifndef TOUPPER 16 #define TOUPPER(ch) (((ch) >= 'a' && (ch) <= 'z') ? ((ch) - 'a' + 'A') : (ch)) 17 #endif 18 19 /* all of this is just for the static hash-table generation. only the hash 20 * function gets included in `nnn` binary. 21 */ 22 #ifdef ICONS_GENERATE 23 24 #include <stdio.h> 25 #include <string.h> 26 #include <stdlib.h> 27 #include <limits.h> 28 #include "icons.h" 29 30 /* like assert, but always sticks around during generation. */ 31 #define ENSURE(X) do { \ 32 if (!(X)) { \ 33 fprintf(stderr, "%s:%d: `%s`\n", __FILE__, __LINE__, #X); \ 34 abort(); \ 35 } \ 36 } while (0) 37 #define ARRLEN(X) (sizeof(X) / sizeof((X)[0])) 38 #define MAX(A, B) ((A) > (B) ? (A) : (B)) 39 #define HGEN_ITERARATION (1ul << 13) 40 #define ICONS_PROBE_MAX_ALLOWED 6 41 #define ICONS_MATCH_MAX (512) 42 43 #if 0 /* for logging some interesting info to stderr */ 44 #define log(...) fprintf(stderr, "[INFO]: " __VA_ARGS__) 45 #else 46 #define log(...) ((void)0) 47 #endif 48 49 static uint32_t icon_ext_hash(const char *s); 50 51 /* change ICONS_TABLE_SIZE to increase the size of the table */ 52 static struct icon_pair table[1u << ICONS_TABLE_SIZE]; 53 static uint8_t seen[ARRLEN(table)]; 54 /* arbitrarily picked starting position. change if needed. 55 * but ensure they're above 1 and prefer prime numbers. 56 */ 57 static uint32_t hash_start = 7; 58 static uint32_t hash_mul = 251; /* unused as of now */ 59 60 /* 61 * use robin-hood insertion to reduce the max probe length 62 */ 63 static void 64 rh_insert(const struct icon_pair item, uint32_t idx, uint32_t n) 65 { 66 ENSURE(n != 0); 67 for (uint32_t tries = 0; tries < ARRLEN(table); ++tries, ++n) { 68 if (seen[idx] < n) { 69 struct icon_pair tmp_item = table[idx]; 70 uint32_t tmp_n = seen[idx]; 71 72 ENSURE(n < (uint8_t)-1); 73 table[idx] = item; 74 seen[idx] = n; 75 76 if (tmp_n != 0) /* the slot we inserted to wasn't empty */ 77 rh_insert(tmp_item, idx, tmp_n); 78 return; 79 } 80 idx = (idx + 1) % ARRLEN(table); 81 } 82 ENSURE(0 && "unreachable"); 83 } 84 85 enum { PROBE_MAX, PROBE_TOTAL, PROBE_CNT }; 86 static unsigned int * 87 table_populate(unsigned int p[static PROBE_CNT]) 88 { 89 memset(seen, 0x0, sizeof seen); 90 memset(table, 0x0, sizeof table); 91 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) { 92 if (icons_ext[i].icon[0] == '\0') /* skip empty entries */ 93 continue; 94 uint32_t h = icon_ext_hash(icons_ext[i].match); 95 rh_insert(icons_ext[i], h, 1); 96 } 97 98 p[PROBE_MAX] = p[PROBE_TOTAL] = 0; 99 for (size_t i = 0; i < ARRLEN(seen); ++i) { 100 p[PROBE_MAX] = MAX(p[PROBE_MAX], seen[i]); 101 p[PROBE_TOTAL] += seen[i]; 102 } 103 return p; 104 } 105 106 /* permuted congruential generator */ 107 static uint32_t 108 pcg(uint64_t *state) 109 { 110 uint64_t oldstate = *state; 111 *state *= GOLDEN_RATIO_64; 112 uint32_t r = (oldstate >> 59); 113 uint32_t v = (oldstate ^ (oldstate >> 18)) >> 27; 114 return (v >> (-r & 31)) | (v << r); 115 } 116 117 int 118 main(void) 119 { 120 ENSURE(ARRLEN(icons_ext) <= ARRLEN(table)); 121 ENSURE(ICONS_TABLE_SIZE < 16); 122 ENSURE(1u << ICONS_TABLE_SIZE == ARRLEN(table)); 123 ENSURE((GOLDEN_RATIO_32 & 1) == 1); /* must be odd */ 124 ENSURE((GOLDEN_RATIO_64 & 1) == 1); /* must be odd */ 125 ENSURE(hash_start > 1); 126 ENSURE(hash_mul > 1); 127 /* ensure power of 2 hashtable size which allows compiler to optimize 128 * away mod (`%`) operations 129 */ 130 ENSURE((ARRLEN(table) & (ARRLEN(table) - 1)) == 0); 131 132 unsigned int max_probe = (unsigned)-1; 133 uint32_t best_hash_start = 0, best_hash_mul = 0, best_total_probe = 9999; 134 uint64_t hash_start_rng = hash_start, hash_mul_rng = hash_mul; 135 136 for (size_t i = 0; i < HGEN_ITERARATION; ++i) { 137 unsigned *p = table_populate((unsigned [PROBE_CNT]){0}); 138 if (p[PROBE_MAX] < max_probe || 139 (p[PROBE_MAX] == max_probe && p[PROBE_TOTAL] < best_total_probe)) 140 { 141 max_probe = p[PROBE_MAX]; 142 best_total_probe = p[PROBE_TOTAL]; 143 best_hash_start = hash_start; 144 best_hash_mul = hash_mul; 145 } 146 hash_start = pcg(&hash_start_rng); 147 hash_mul = pcg(&hash_mul_rng); 148 } 149 ENSURE(max_probe < ICONS_PROBE_MAX_ALLOWED); 150 hash_start = best_hash_start; 151 hash_mul = best_hash_mul; 152 { 153 unsigned *p = table_populate((unsigned [PROBE_CNT]){0}); 154 ENSURE(p[PROBE_MAX] == max_probe); 155 ENSURE(p[PROBE_TOTAL] == best_total_probe); 156 } 157 158 /* sanity check */ 159 double nitems = 0; 160 unsigned int total_probe = 0; 161 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) { 162 if (icons_ext[i].icon[0] == 0) 163 continue; 164 uint32_t found = 0, h = icon_ext_hash(icons_ext[i].match); 165 for (uint32_t k = 0; k < max_probe; ++k) { 166 uint32_t z = (h + k) % ARRLEN(table); 167 ++total_probe; 168 if (table[z].match && strcasecmp(icons_ext[i].match, table[z].match) == 0) { 169 found = 1; 170 break; 171 } 172 } 173 ENSURE(found); 174 ++nitems; 175 } 176 ENSURE(total_probe == best_total_probe); 177 178 size_t match_max = 0, icon_max = 0; 179 for (size_t i = 0; i < ARRLEN(icons_name); ++i) { 180 match_max = MAX(match_max, strlen(icons_name[i].match) + 1); 181 icon_max = MAX(icon_max, strlen(icons_name[i].icon) + 1); 182 } 183 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) { 184 match_max = MAX(match_max, strlen(icons_ext[i].match) + 1); 185 icon_max = MAX(icon_max, strlen(icons_ext[i].icon) + 1); 186 } 187 icon_max = MAX(icon_max, strlen(dir_icon.icon) + 1); 188 icon_max = MAX(icon_max, strlen(exec_icon.icon) + 1); 189 icon_max = MAX(icon_max, strlen(file_icon.icon) + 1); 190 ENSURE(icon_max < ICONS_MATCH_MAX); 191 192 const char *uniq[ARRLEN(icons_ext)] = {0}; 193 size_t uniq_head = 0; 194 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) { 195 if (icons_ext[i].icon[0] == 0) 196 continue; 197 int isuniq = 1; 198 for (size_t k = 0; k < uniq_head; ++k) { 199 if (strcasecmp(uniq[k], icons_ext[i].icon) == 0) { 200 isuniq = 0; 201 break; 202 } 203 } 204 if (isuniq) { 205 ENSURE(uniq_head < ARRLEN(uniq)); 206 uniq[uniq_head++] = icons_ext[i].icon; 207 } 208 } 209 ENSURE(uniq_head < (unsigned char)-1); 210 211 log("load-factor: %.2f (%u/%zu)\n", (nitems * 100.0) / (double)ARRLEN(table), 212 (unsigned int)nitems, ARRLEN(table)); 213 log("max_probe : %6u\n", max_probe); 214 log("total_probe: %6u\n", total_probe); 215 log("uniq icons : %6zu\n", uniq_head); 216 log("no-compact : %6zu bytes\n", ARRLEN(table) * icon_max); 217 log("compaction : %6zu bytes\n", uniq_head * icon_max + ARRLEN(table)); 218 log("hash_start : %6" PRIu32 "\n", hash_start); 219 log("hash_mul : %6" PRIu32 "\n", hash_mul); 220 221 printf("#ifndef INCLUDE_ICONS_GENERATED\n"); 222 printf("#define INCLUDE_ICONS_GENERATED\n\n"); 223 224 printf("/*\n * NOTE: This file is automatically generated.\n"); 225 printf(" * DO NOT EDIT THIS FILE DIRECTLY.\n"); 226 printf(" * Use `icons.h` to customize icons\n */\n\n"); 227 228 printf("#define hash_start UINT32_C(%" PRIu32 ")\n", hash_start); 229 printf("#define hash_mul UINT32_C(%" PRIu32 ")\n\n", hash_mul); 230 printf("#define ICONS_PROBE_MAX %uu\n", max_probe); 231 printf("#define ICONS_MATCH_MAX %zuu\n\n", match_max); 232 printf("#define ICONS_STR_MAX %zuu\n\n", icon_max); 233 234 printf("struct icon_pair { const char match[ICONS_MATCH_MAX]; " 235 "const char icon[ICONS_STR_MAX]; unsigned char color; };\n\n"); 236 237 printf("static const char icons_ext_uniq[%zu][ICONS_STR_MAX] = {\n", uniq_head); 238 for (size_t i = 0; i < uniq_head; ++i) 239 printf("\t\"%s\",\n", uniq[i]); 240 printf("};\n\n"); 241 242 printf("static const struct {\n\tconst char match[ICONS_MATCH_MAX];\n" 243 "\tunsigned char idx;\n\tunsigned char color;\n} icons_ext[%zu] = {\n", 244 ARRLEN(table)); 245 for (size_t i = 0; i < ARRLEN(table); ++i) { 246 if (table[i].icon == NULL || table[i].icon[0] == '\0') /* skip empty entries */ 247 continue; 248 size_t k; 249 for (k = 0; k < uniq_head; ++k) { 250 if (strcasecmp(table[i].icon, uniq[k]) == 0) 251 break; 252 } 253 ENSURE(k < uniq_head); 254 printf("\t[%3zu] = {\"%s\", %zu, %hhu },\n", 255 i, table[i].match, k, table[i].color); 256 } 257 printf("};\n\n"); 258 259 printf("#endif /* INCLUDE_ICONS_GENERATED */\n"); 260 } 261 262 #else 263 #define ENSURE(X) ((void)0) 264 #endif /* ICONS_GENERATE */ 265 266 #if defined(ICONS_GENERATE) || defined(ICONS_ENABLED) 267 static uint32_t 268 icon_ext_hash(const char *str) 269 { 270 uint32_t i, hash = hash_start; 271 enum { wsz = sizeof hash * CHAR_BIT, z = wsz - ICONS_TABLE_SIZE, r = 5 }; 272 273 /* just an xor-rotate hash. in general, this is a horrible hash 274 * function but for our specific input it works fine while being 275 * computationally cheap. 276 */ 277 for (i = 0; i < ICONS_MATCH_MAX && str[i] != '\0'; ++i) { 278 hash ^= TOUPPER((unsigned char)str[i]); 279 hash = (hash >> (wsz - r)) | (hash << r); 280 } 281 282 /* finalizer: https://probablydance.com/2018/06/16 */ 283 hash ^= (hash >> z); 284 hash *= GOLDEN_RATIO_32; 285 286 hash >>= z; 287 ENSURE(hash < ARRLEN(table)); 288 289 return hash; 290 } 291 #endif