canvas.emoji.sh (17516B)
1 #!/bin/bash 2 3 function mkd { [[ -d $1 ]] || mkdir -p "$1"; } 4 5 ## @fn make/canvas.emoji/get-emoji-data [emoji_version] 6 ## @var[out] emoji_data 7 ## @var[out] emoji_cache_file 8 function make/canvas.emoji/get-emoji-data { 9 #local unicode_version=$(wget https://unicode.org/Public/emoji/ -O - | grep -Eo 'href="[0-9]+\.[0-9]+/"' | sed 's,^href=",,;s,/"$,,' | tail -n 1) 10 local unicode_version=${1:-14.0} 11 emoji_cache_file=out/data/unicode-emoji-$unicode_version.txt 12 if [[ ! -s $emoji_cache_file ]]; then 13 mkd out/data 14 wget "https://unicode.org/Public/emoji/$unicode_version/emoji-test.txt" -O "$emoji_cache_file.part" && 15 mv "$emoji_cache_file.part" "$emoji_cache_file" 16 fi 17 18 local gawk_script=' 19 /^[[:space:]]*#/ { next; } 20 sub(/;.*$/, "") { print $0; }' 21 ble/util/assign-array emoji_data 'gawk "$gawk_script" "$emoji_cache_file"' 22 } 23 24 function make/canvas.emoji/sub:help { 25 ble/util/print "usage: source ${BASH_SOURCE##*/}${BASH_SOURCE:-canvas.emoji.sh} SUBCOMMAND ARGS..." 26 ble/util/print 27 ble/util/print "SUBCOMMAND" 28 declare -F | sed -n 's/^declare -f make\/canvas.emoji\/sub:\([^[:space:]]*\)/ \1/p' 29 ble/util/print 30 } 31 32 function make/canvas.emoji/sub:save-emoji-type { 33 local emoji_data emoji_cache_file 34 make/canvas.emoji/get-emoji-data 35 gawk ' 36 /^[[:space:]]*#/ { next; } 37 { 38 if (/unqualified/) { 39 type = "UQ"; 40 } else if (/fully-qualified/) { 41 type = "FQ"; 42 } else if (/minimally-qualified/) { 43 type = "MQ"; 44 } else { 45 type = "XX"; 46 } 47 } 48 sub(/;.*$/, "") { 49 s = ""; 50 for (i = 1; i <= NF; i++) { 51 s = s sprintf("\\U%05X", strtonum("0x" $i)); 52 } 53 print s ": " type; 54 } 55 ' "$emoji_cache_file" | sort -u > out/data/emoji.TYPE.txt 56 } 57 58 function make/canvas.emoji/sub:compare { 59 grep '^\\' out/data/emoji.TYPE.txt | 60 join - <(awk '/^\\/ { sub(/^w=/, "blesh=", $2); print; }' out/data/emoji.blesh.txt) | 61 join - <(awk '/^\\/ { sub(/^w=/, "kitty=", $2); print; }' out/data/emoji.kitty.txt) 62 # grep -E '^.{7}: UQ' | less 63 } 64 65 #------------------------------------------------------------------------------ 66 # measure-emoji.impl1 67 68 _tool_emoji_width_code=() 69 _tool_emoji_width_gcb=() 70 _tool_emoji_width_w=() 71 function inspect1/proc { 72 local -a DRAW_BUFF=() code=() gcb=() 73 local ret c 74 ble/canvas/put.draw $'\r' 75 for c; do 76 ((c=16#$c)) 77 ble/array#push code "$c" 78 ble/unicode/GraphemeCluster/c2break "$c" 79 ble/array#push gcb "$ret" 80 ble/util/c2s "$c" 81 ble/canvas/put.draw "$ret" 82 done 83 84 ble/array#push _tool_emoji_width_code "${code[*]}" 85 ble/array#push _tool_emoji_width_gcb "${gcb[*]}" 86 ble/term/CPR/request.draw inspect1/callback 87 ble/canvas/bflush.draw 88 } 89 function inspect1/callback { 90 local term_l=$1 term_c=$2 91 local w=$((term_c-1)) 92 ble/array#push _tool_emoji_width_w "$w" 93 } 94 function inspect1/callback-final { 95 echo ---------------------------------------- 96 date +'%F %T %Z' 97 echo "request count: ${#_tool_emoji_width_code[@]}" 98 echo "response count: ${#_tool_emoji_width_w[@]}" 99 echo "remaining CPR hooks: ${#_ble_term_CPR_hook[@]}" 100 for ((i=0;i<${#_tool_emoji_width_w[@]};i++)); do 101 echo "${_tool_emoji_width_gcb[i]}: w=${_tool_emoji_width_w[i]}" 102 done | sort -u 103 } >> emoji.txt 104 105 function make/canvas.emoji/sub:measure-emoji.impl1 { 106 local emoji_data emoji_cache_file 107 make/canvas.emoji/get-emoji-data 108 ble/util/buffer.flush >&2 109 local line 110 for line in "${emoji_data[@]}"; do 111 eval "inspect1/proc $line" 112 done 113 ble/term/CPR/request.buff inspect1/callback-final 114 ble/util/buffer.flush >&2 115 } 116 117 #------------------------------------------------------------------------------ 118 # measure-emoji 119 120 _term_emojiw_index_req=0 121 _term_emojiw_index_rcv=0 122 _term_emojiw_data=() 123 _term_emojiw_code=() 124 _term_emojiw_gcb=() 125 _term_emojiw_width=() 126 _term_emojiw_output=emoji.txt 127 128 function inspect2/start { 129 _term_emojiw_index_req=0 130 _term_emojiw_index_rcv=0 131 _term_emojiw_data=("${emoji_data[@]}") 132 _term_emojiw_output=emoji.txt 133 : > "$_term_emojiw_output" 134 inspect2/next 135 } 136 137 function inspect2/next { 138 if ((_term_emojiw_index_rcv>=${#_term_emojiw_data[@]})); then 139 inspect2/final 140 return 141 fi 142 143 local ndata=${#_term_emojiw_data[@]} 144 145 local i 146 for ((i=0;i<10&&_term_emojiw_index_req<ndata;i++,_term_emojiw_index_req++)); do 147 local words 148 ble/string#split-words words "${_term_emojiw_data[_term_emojiw_index_req]}" 149 150 local -a gcb=() code=() 151 local c s= 152 for c in "${words[@]}"; do 153 ((c=16#$c)) 154 ble/array#push code "$c" 155 ble/unicode/GraphemeCluster/c2break "$c" 156 ble/array#push gcb "$ret" 157 ble/util/c2s "$c" 158 s=$s$ret 159 done 160 ble/util/sprintf ret '\\U%05X' "${code[@]}" 161 ble/array#push _term_emojiw_code "$ret" 162 ble/array#push _term_emojiw_gcb "${gcb[*]}" 163 164 ble/util/buffer $'\r'"$s" 165 ble/term/CPR/request.buff inspect2/wait 166 ble/util/buffer.flush >&2 167 done 168 ble/edit/info/show text "Measuring #$_term_emojiw_index_rcv..$_term_emojiw_index_req" 169 } 170 function inspect2/wait { 171 local col=$2 172 ((_term_emojiw_width[_term_emojiw_index_rcv]=col-1)) 173 ((++_term_emojiw_index_rcv==_term_emojiw_index_req)) && 174 inspect2/next 175 return 0 176 } 177 function inspect2/final { 178 { 179 echo ---------------------------------------- 180 date +'%F %T %Z' 181 echo "request count: ${#_term_emojiw_gcb[@]}" 182 echo "response count: ${#_term_emojiw_width[@]}" 183 local i 184 for ((i=0;i<_term_emojiw_index_rcv;i++)); do 185 echo "${_term_emojiw_gcb[i]}: w=${_term_emojiw_width[i]}" 186 done | sort -u 187 for ((i=0;i<_term_emojiw_index_rcv;i++)); do 188 echo "${_term_emojiw_code[i]}: w=${_term_emojiw_width[i]}" 189 done | sort -u 190 } >> "$_term_emojiw_output" 191 echo Done 192 } 193 194 function make/canvas.emoji/sub:measure-emoji { 195 local emoji_data emoji_cache_file 196 make/canvas.emoji/get-emoji-data 14.0 197 inspect2/start 198 } 199 200 #------------------------------------------------------------------------------ 201 # measure-emoji-sequences 202 203 ## @fn ble/unicode/measure-emoji-sequences 204 ## @var[in] emoji_data 205 function ble/unicode/measure-emoji-sequences { 206 local line words ret count=0 207 local -a codes=() gcbs=() widths=() 208 for line in "${emoji_data[@]}"; do 209 ble/string#split-words words "$line" 210 211 local s= word c 212 local -a code=() gcb=() 213 for word in "${words[@]}"; do 214 ((c=16#$word)) 215 ble/array#push code "$c" 216 ble/unicode/GraphemeCluster/c2break "$c"; ble/array#push gcb "$ret" 217 ble/util/c2s "$c"; s=$s$ret 218 done 219 ble/util/sprintf ret '\\U%05X' "${code[@]}" 220 ble/array#push codes "$ret" 221 ble/array#push gcbs "${gcb[*]}" 222 ble/util/s2w "$s" 223 ble/array#push widths "$ret" 224 done 225 226 local i n=${#codes[@]} 227 for ((i=0;i<n;i++)); do 228 echo "${gcbs[i]}: w=${widths[i]}" 229 done | sort -u 230 for ((i=0;i<n;i++)); do 231 echo "${codes[i]}: w=${widths[i]}" 232 done | sort -u 233 } 234 function ble/unicode/test-emoji-sequence-width { 235 local term=$1 scheme=$2 236 ble/unicode/measure-emoji-sequences > out/data/emoji."blesh-$scheme".txt 237 diff -bwu <(grep '^\\U' out/data/emoji."blesh-$scheme".txt) <(grep '^\\U' out/data/emoji."$term".txt) 238 } 239 240 ## @fn make/canvas.emoji/sub:measure-blesh term [scheme] 241 function make/canvas.emoji/sub:measure-blesh { 242 local emoji_data emoji_cache_file 243 make/canvas.emoji/get-emoji-data 14.0 244 245 local term=$1 scheme=${2:-$1} 246 case $scheme in 247 (blesh) 248 ( 249 echo blesh... 250 bleopt char_width_mode=east 251 bleopt emoji_width=2 252 bleopt emoji_opts=ri:tpvs:epvs:zwj 253 ble/unicode/measure-emoji-sequences > out/data/emoji.blesh.txt 254 ) ;; 255 256 (kitty) 257 ( 258 echo kitty... 259 bleopt char_width_mode=west 260 bleopt emoji_width=2 261 bleopt emoji_version=13.1 262 bleopt emoji_opts=ri:tpvs:epvs 263 _ble_util_c2w=( 264 # これらは絵文字になる可能性のある全角であり半角にはならない筈 265 [0x3030]=1 [0x303D]=1 [0x3297]=1 [0x3299]=1 266 # これらは肌の色を変える拡張文字だが単体で使われた時の幅は多くの端末で2 267 [0x1F3FB]=0 [0x1F3FC]=0 [0x1F3FD]=0 [0x1F3FE]=0 [0x1F3FF]=0 268 ) 269 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 270 ) ;; 271 272 (rlogin) 273 ( 274 echo RLogin 275 bleopt char_width_mode=west 276 bleopt emoji_width=2 277 bleopt emoji_version=12.1 278 bleopt grapheme_cluster=extended 279 bleopt emoji_opts=ri:zwj 280 _ble_util_c2w=( 281 # これらは unqualified だが多くの端末で特別に幅2の様だ 282 [0x1F202]=2 [0x1F237]=2 283 284 # これらは肌の色を変える拡張文字だが単体で使われた時の幅は多くの端末で2 285 [0x1F3FB]=0 [0x1F3FC]=0 [0x1F3FD]=0 [0x1F3FE]=0 [0x1F3FF]=0 286 ) 287 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 288 ) ;; 289 290 (alacritty) 291 ( 292 echo Alacritty 293 bleopt char_width_mode=west 294 bleopt emoji_width=2 295 bleopt emoji_version=13.1 296 bleopt emoji_opts=ri 297 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 298 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 299 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 300 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 301 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 302 _ble_util_c2w=([0x1F202]=2 [0x1F237]=2) 303 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 304 ) ;; 305 306 (vte|urxvt) 307 ( 308 echo 'vte (GNOME terminal, terminator) / urxvt...' 309 bleopt char_width_mode=west 310 bleopt emoji_width=2 311 bleopt emoji_version=12.1 312 bleopt emoji_opts=ri 313 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 314 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 315 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 316 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 317 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 318 # ↓これらは unqualified だが vte では特別に幅2の様だ 319 _ble_util_c2w=([0x1F202]=2 [0x1F237]=2) 320 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 321 ) ;; 322 323 (mintty) 324 ( 325 echo mintty... 326 bleopt char_width_mode=west 327 bleopt emoji_width=2 328 bleopt emoji_version=11.0 329 bleopt emoji_opts=ri 330 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 331 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 332 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 333 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 334 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 335 _ble_util_c2w=([0x1F202]=2 [0x1F237]=2) 336 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 337 ) ;; 338 339 (konsole|st) 340 ( 341 echo "$scheme..." 342 bleopt char_width_mode=west 343 bleopt emoji_width=2 344 bleopt emoji_version=11.0 345 bleopt emoji_opts= 346 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 347 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 348 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 349 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 350 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 351 _ble_util_c2w=( 352 # これらは unqualified だが vte では特別に幅2の様だ 353 [0x1F202]=2 [0x1F237]=2 354 ) 355 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 356 ) ;; 357 358 (mlterm) 359 ( 360 echo mlterm... # (全然合わない。unqualified も emoji に入っている気がする) 361 bleopt char_width_mode=east 362 bleopt emoji_width=2 363 bleopt emoji_version=11.0 364 bleopt grapheme_cluster=extended 365 bleopt emoji_opts=ri 366 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 367 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 368 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 369 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 370 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 371 _ble_unicode_GraphemeClusterBreak[0x0200D]=$_ble_unicode_GraphemeClusterBreak_Other 372 # Unicode tags 373 for ((code=0xE0020;code<=0xE007F;code++)); do 374 _ble_unicode_GraphemeClusterBreak[code]=$_ble_unicode_GraphemeClusterBreak_Other 375 done 376 _ble_util_c2w=( 377 # ZWJ が幅1になる 378 [0x0200D]=1 379 # これらは unqualified だが多くの端末で特別に幅2の様だ 380 [0x1F202]=2 [0x1F237]=2 381 # mlterm は一部の unqualified だけを幅2にしている。 382 [0x26F0]=2 [0x26F1]=2 [0x26F4]=2 [0x26F7]=2 [0x26F8]=2 [0x26F9]=2 383 [0x26C8]=2 [0x26CF]=2 [0x26D1]=2 [0x26D3]=2 [0x26E9]=2 384 [0x1F170]=2 [0x1F171]=2 [0x1F17E]=2 [0x1F17F]=2 385 ) 386 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 387 ) ;; 388 389 (terminology) 390 ( 391 echo terminology... 392 bleopt char_width_mode=west 393 bleopt emoji_width=2 394 bleopt emoji_version=2.0 395 bleopt emoji_opts=unqualified:min=U+3000 396 bleopt grapheme_cluster=legacy 397 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 398 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 399 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 400 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 401 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 402 _ble_unicode_GraphemeClusterBreak[0x0200D]=$_ble_unicode_GraphemeClusterBreak_Other 403 _ble_unicode_GraphemeClusterBreak[0x020E3]=$_ble_unicode_GraphemeClusterBreak_Other 404 # Unicode tags 405 for ((code=0xE0020;code<=0xE007F;code++)); do 406 _ble_unicode_GraphemeClusterBreak[code]=$_ble_unicode_GraphemeClusterBreak_Other 407 done 408 _ble_util_c2w=( 409 # これらは unqualified だが vte では特別に幅2の様だ 410 [0x1F202]=2 [0x1F237]=2 411 ) 412 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 413 ) ;; 414 415 (xterm) 416 ( 417 echo xterm... 418 bleopt char_width_mode=west 419 bleopt emoji_width= 420 #bleopt emoji_version=11.0 421 bleopt emoji_opts= 422 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 423 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 424 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 425 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 426 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 427 _ble_util_c2w=( 428 [0x1F202]=2 [0x1F237]=2 429 430 #[0x1F3FB]=2 [0x1F3FC]=2 [0x1F3FD]=2 [0x1F3FE]=2 [0x1F3FF]=2 431 [0x1F3FB]=1 [0x1F3FC]=1 [0x1F3FD]=1 [0x1F3FE]=1 [0x1F3FF]=1 432 433 [0x1F9AF]=2 434 [0x1F9B0]=2 435 [0x1F9B1]=2 436 [0x1F9B2]=2 437 [0x1F9B3]=2 438 [0x1F9BC]=2 439 [0x1F9BD]=2 440 ) 441 # Unicode tags 442 for ((code=0xE0020;code<=0xE007F;code++)); do 443 _ble_unicode_GraphemeClusterBreak[code]=$_ble_unicode_GraphemeClusterBreak_Other 444 done 445 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 446 ) ;; 447 448 (screen) 449 ( 450 echo screen... 451 bleopt char_width_mode=emacs 452 bleopt emoji_width= 453 bleopt emoji_opts= 454 _ble_unicode_GraphemeClusterBreak[0x1F3FB]=$_ble_unicode_GraphemeClusterBreak_Pictographic 455 _ble_unicode_GraphemeClusterBreak[0x1F3FC]=$_ble_unicode_GraphemeClusterBreak_Pictographic 456 _ble_unicode_GraphemeClusterBreak[0x1F3FD]=$_ble_unicode_GraphemeClusterBreak_Pictographic 457 _ble_unicode_GraphemeClusterBreak[0x1F3FE]=$_ble_unicode_GraphemeClusterBreak_Pictographic 458 _ble_unicode_GraphemeClusterBreak[0x1F3FF]=$_ble_unicode_GraphemeClusterBreak_Pictographic 459 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 460 ) ;; 461 462 (contra) 463 ( 464 echo contra 465 bleopt char_width_mode=emacs 466 bleopt emoji_width= 467 bleopt grapheme_cluster= 468 ble/unicode/test-emoji-sequence-width "$term" "$scheme" 469 ) ;; 470 471 esac 472 } 473 474 #------------------------------------------------------------------------------ 475 476 function make/canvas.emoji/sub:dump-EmojiStatus { 477 local emoji_data emoji_cache_file 478 make/canvas.emoji/get-emoji-data 479 480 local line words code 481 for line in "${emoji_data[@]}"; do 482 ble/string#split-words words "$line" 483 484 ((${#words[@]}==1)) || continue 485 ((code=16#${words[0]})) 486 ble/unicode/EmojiStatus "$code" 487 printf 'U+%05X %d\n' "$code" "$ret" 488 done 489 } 490 491 492 if declare -F "make/canvas.emoji/sub:$1" &>/dev/null; then 493 "make/canvas.emoji/sub:$@" 494 else 495 make/canvas.emoji/sub:help 496 fi