Comparison of PHP UTF-8 functions

For phputf8 and patchwork the PHP implementation with the least dependency on extensions was choosen.

Performance comparison

Short string (ASCII: 20 Bytes, UTF-8: 27 Bytes)

Each test consistent of 100000 calls.

Function PHP Native PHP Multibyte PHP iconv phputf8 Patchwork PECL UTF-8
strlen - ASCII 0.9466 (0.1172) 1.5778 (0.1953) 1.7178 (0.2126) 2.8843 (0.3570) 12.7971 (1.5839)** 1.0000 (0.1238)
strlen - UTF-8 0.9717 (0.1223)* 1.5399 (0.1938) 1.7321 (0.2180) 2.9603 (0.3726) 13.6139 (1.7135)** 1.0000 (0.1259)
substr - ASCII 0.9805 (0.1440) 1.4165 (0.2081) 2.3135 (0.3398) 11.0357 (1.6209) 20.8770 (3.0664) 1.0000 (0.1469)
substr - UTF-8 0.9160 (0.1422)* 1.3481 (0.2092) 2.1736 (0.3374) 10.5718 (1.6409) 20.9385 (3.2500) 1.0000 (0.1552)
str_split - ASCII 0.9728 (0.3396) -- -- 4.9798 (1.7385) 5.8519 (2.0429) 1.0000 (0.3491)
str_split - UTF-8 1.1189 (0.4052)* -- -- 4.8584 (1.7596) 12.8865 (4.6672) 1.0000 (0.3622)
strrev - ASCII 1.0097 (0.1269) -- -- 10,897,314,169.2360 (1,369,355,873.2941) 21.3053 (2.6772) 1.0000 (0.1257)
strrev - UTF-8 0.8905 (0.1225)* -- -- 9,953,960,331.3119 (1,369,355,877.6209) 38.6767 (5.3207) 1.0000 (0.1376)
strpos - ASCII 0.6084 (0.1373) 1.0308 (0.2326) 1.0711 (0.2417) 10.9561 (2.4722) 19.1746 (4.3268) 1.0000 (0.2256)
strpos - UTF-8 1.4772 (0.2270)* 2.2944 (0.3527) 1.8901 (0.2905) 16.2507 (2.4978) 27.5169 (4.2295) 1.0000 (0.1537)
strrpos - ASCII 0.8194 (0.1258) 1.5077 (0.2315) 1.7862 (0.2743) 7.3825 (1.1336) 25.7626 (3.9561) 1.0000 (0.1536)
strrpos - UTF-8 0.8088 (0.1241)* 1.5198 (0.2333) 1.7648 (0.2709) 7.4996 (1.1510) 25.3784 (3.8951) 1.0000 (0.1535)
ord - ASCII 0.9529 (0.1109) -- -- 2.1545 (0.2508) 4.4249 (0.5151) 1.0000 (0.1164)
ord - UTF-8 0.9421 (0.1117)* -- -- 3.8167 (0.4524) 4.7754 (0.5660) 1.0000 (0.1185)
chr - ASCII 1.0246 (0.1182) -- -- -- 2.1745 (0.2508) 1.0000 (0.1153)
chr - UTF-8 1.0373 (0.1196)* -- -- -- 3.5397 (0.4082) 1.0000 (0.1153)
string_is_ascii - ASCII -- -- -- 3.8476 (0.4311) -- 1.0000 (0.1120)
string_is_ascii - UTF-8 -- -- -- 3.0182 (0.3308) -- 1.0000 (0.1096)
strip_non_ascii - ASCII -- -- -- 10.1951 (1.2196) -- 1.0000 (0.1196)
strip_non_ascii - UTF-8 -- -- -- 72.2683 (8.7419) -- 1.0000 (0.1210)
utf8_is_valid - ASCII -- -- -- 34.2569 (3.8955)*** 2.6152 (0.2974) 1.0000 (0.1137)
utf8_is_valid - UTF-8 -- -- -- 44.6710 (6.7289)*** 2.0202 (0.3043) 1.0000 (0.1506)
utf8_encode - ASCII 1.4058 (0.1666) 3.2053 (0.3798) 2.2857 (0.2709) -- 22.2931 (2.6417) 1.0000 (0.1185)
utf8_decode - ASCII 1.1412 (0.1449) 3.0285 (0.3846) 2.1327 (0.2709) -- 23.3210 (2.9619) 1.0000 (0.1270)
utf8_decode - UTF-8 1.1213 (0.1428) 2.9909 (0.3810) 2.1262 (0.2709) -- 43.7900 (5.5785) 1.0000 (0.1274)

Medium string (ASCII: 0.95 Kilobytes, UTF-8: 0.96 Kilobytes)

Each test consistent of 10000 calls.

Function PHP Native PHP Multibyte PHP iconv phputf8 Patchwork PECL UTF-8
strlen - ASCII 0.2705 (0.0116) 1.3705 (0.0590) 5.0194 (0.2161) 2.0242 (0.0871) 135.0205 (5.8126)** 1.0000 (0.0430)
strlen - UTF-8 0.2915 (0.0126)* 1.3189 (0.0571) 5.0530 (0.2189) 1.9617 (0.0850) 132.6307 (5.7465)** 1.0000 (0.0433)
substr - ASCII 0.3038 (0.0139) 0.4653 (0.0213) 5.0141 (0.2291) 4.8280 (0.2206) 127.9646 (5.8462) 1.0000 (0.0457)
substr - UTF-8 0.3070 (0.0140)* 0.4544 (0.0207) 5.0923 (0.2322) 4.9057 (0.2236) 126.6297 (5.7729) 1.0000 (0.0456)
str_split - ASCII 1.2049 (2.7218) -- -- 1.8236 (4.1195) 3.0732 (6.9423) 1.0000 (2.2589)
str_split - UTF-8 0.7714 (2.2274)* -- -- 1.4916 (4.3069) 2.6768 (7.7293) 1.0000 (2.8875)
strrev - ASCII 0.3429 (0.0206) -- -- 22,840,138,410.2489 (1,369,356,013.6159) 141.0590 (8.4570) 1.0000 (0.0600)
strrev - UTF-8 0.3253 (0.0213)* -- -- 20,945,532,686.2121 (1,369,356,027.4646) 139.9636 (9.1504) 1.0000 (0.0654)
strpos - ASCII 0.3220 (0.0146) 1.4089 (0.0640) 0.5703 (0.0259) 6.1827 (0.2808) 133.1242 (6.0464) 1.0000 (0.0454)
strpos - UTF-8 0.2769 (0.0128)* 1.3164 (0.0611) 0.5009 (0.0232) 6.0759 (0.2819) 128.0609 (5.9415) 1.0000 (0.0464)
strrpos - ASCII 0.1494 (0.0124) 0.8301 (0.0687) 4.2721 (0.3536) 3.2034 (0.2651) 70.8232 (5.8615) 1.0000 (0.0828)
strrpos - UTF-8 0.1844 (0.0145)* 1.0650 (0.0838) 6.2882 (0.4948) 5.2689 (0.4146) 77.0828 (6.0659) 1.0000 (0.0787)
string_is_ascii - ASCII -- -- -- 34.8952 (0.6791) -- 1.0000 (0.0195)
string_is_ascii - UTF-8 -- -- -- 8.3526 (0.1090) -- 1.0000 (0.0130)
strip_non_ascii - ASCII -- -- -- 4.3451 (0.1548) -- 1.0000 (0.0356)
strip_non_ascii - UTF-8 -- -- -- 73.6971 (2.6518) -- 1.0000 (0.0360)
utf8_is_valid - ASCII -- -- -- 408.6385 (17.7537)*** 0.9505 (0.0413) 1.0000 (0.0434)
utf8_is_valid - UTF-8 -- -- -- 423.4731 (18.9724)*** 0.9212 (0.0413) 1.0000 (0.0448)
utf8_encode - ASCII 1.4536 (0.0494) 3.4096 (0.1158) 14.5670 (0.4948) -- 305.9687 (10.3938) 1.0000 (0.0340)
utf8_decode - ASCII 1.2989 (0.0659) 2.6902 (0.1364) 9.7602 (0.4948) -- 261.4316 (13.2546) 1.0000 (0.0507)
utf8_decode - UTF-8 1.1763 (0.0650) 2.5426 (0.1405) 8.9542 (0.4948) -- 245.7082 (13.5788) 1.0000 (0.0553)

Long string (ASCII: 47.94 Kilobytes, UTF-8: 48.35 Kilobytes)

Each test consistent of 100 calls.

Function PHP Native PHP Multibyte PHP iconv phputf8 Patchwork PECL UTF-8
strlen - ASCII 0.0104 (0.0002) 1.2346 (0.0198) 6.3561 (0.1021) 1.7312 (0.0278) 183.3722 (2.9448)** 1.0000 (0.0161)
strlen - UTF-8 0.0068 (0.0001)* 1.2230 (0.0199) 6.2024 (0.1008) 1.6152 (0.0262) 178.6731 (2.9036)** 1.0000 (0.0163)
substr - ASCII 0.0085 (0.0001) 0.0126 (0.0002) 6.2586 (0.1013) 1.9523 (0.0316) 176.4845 (2.8554) 1.0000 (0.0162)
substr - UTF-8 0.0083 (0.0001)* 0.0124 (0.0002) 6.2445 (0.1016) 1.9793 (0.0322) 174.1639 (2.8340) 1.0000 (0.0163)
str_split - ASCII 0.9610 (1.7484) -- -- 1.5997 (2.9104) 2.4062 (4.3776) 1.0000 (1.8193)
str_split - UTF-8 0.9480 (1.6338)* -- -- 1.6284 (2.8062) 2.6702 (4.6017) 1.0000 (1.7233)
strrev - ASCII 0.1658 (0.0039) -- -- 58,604,711,080.1106 (1,369,356,180.3568) 231.1940 (5.4021) 1.0000 (0.0234)
strrev - UTF-8 0.1603 (0.0041)* -- -- 53,824,923,792.6194 (1,369,356,189.5225) 221.4237 (5.6332) 1.0000 (0.0254)
strpos - ASCII 0.0088 (0.0001) 1.2485 (0.0200) 0.0156 (0.0003) 0.5854 (0.0094) 179.9146 (2.8864) 1.0000 (0.0160)
strpos - UTF-8 0.0085 (0.0001)* 1.2126 (0.0196) 0.0150 (0.0002) 0.6193 (0.0100) 178.0043 (2.8781) 1.0000 (0.0162)
strrpos - ASCII 0.0041 (0.0001) 0.7381 (0.0237) 5.2703 (0.1695) 3.5913 (0.1155) 89.0067 (2.8629) 1.0000 (0.0322)
strrpos - UTF-8 0.0041 (0.0001)* 0.7423 (0.0242) 5.1712 (0.1688) 3.7790 (0.1233) 87.0631 (2.8411) 1.0000 (0.0326)
string_is_ascii - ASCII -- -- -- 46.0996 (0.3276) -- 1.0000 (0.0071)
string_is_ascii - UTF-8 -- -- -- 8.6298 (0.0011) -- 1.0000 (0.0001)
strip_non_ascii - ASCII -- -- -- 1.1351 (0.0145) -- 1.0000 (0.0128)
strip_non_ascii - UTF-8 -- -- -- 82.9010 (1.0029) -- 1.0000 (0.0121)
utf8_is_valid - ASCII -- -- -- 540.8401 (8.7578)*** 0.3207 (0.0052) 1.0000 (0.0162)
utf8_is_valid - UTF-8 -- -- -- 542.7542 (8.8821)*** 0.3258 (0.0053) 1.0000 (0.0164)
utf8_encode - ASCII 1.6555 (0.0179) 3.6728 (0.0398) 15.5761 (0.1688) -- 480.9922 (5.2111) 1.0000 (0.0108)
utf8_decode - ASCII 1.3127 (0.0261) 2.5812 (0.0513) 8.4872 (0.1688) -- 924.7621 (18.3870) 1.0000 (0.0199)
utf8_decode - UTF-8 1.1881 (0.0254) 2.3698 (0.0507) 7.8808 (0.1688) -- 861.2762 (18.4424) 1.0000 (0.0214)

*The PHP implementation returns bad results. It's listed here to establish a baseline.
** The same alogirthm as in phputf8 is also available.
*** This algortim works on an outdated assumption, patchwork is more represantive for what's possible in userland.

Output comparison

Function call PHP Native PHP Multibyte PHP iconv phputf8 Patchwork PECL UTF-8
strlen(Internationalisation) int(20) int(20) int(20) int(20) int(20) int(20)
strlen(Iñtërnâtiônàlizætiøn) int(27) int(20) int(20) int(20) int(20) int(20)
substr(Internationalisation, 3) string(17) "ernationalisation" string(17) "ernationalisation" string(17) "ernationalisation" string(17) "ernationalisation" string(17) "ernationalisation" string(17) "ernationalisation"
substr(Iñtërnâtiônàlizætiøn, 3) string(24) "tërnâtiônàlizætiøn" string(23) "ërnâtiônàlizætiøn" string(23) "ërnâtiônàlizætiøn" string(23) "ërnâtiônàlizætiøn" string(23) "ërnâtiônàlizætiøn" string(23) "ërnâtiônàlizætiøn"
substr(Internationalisation, 3, 7) string(7) "ernatio" string(7) "ernatio" string(7) "ernatio" string(7) "ernatio" string(7) "ernatio" string(7) "ernatio"
substr(Iñtërnâtiônàlizætiøn, 3, 7) string(7) "tërnâ" string(10) "ërnâtiô" string(10) "ërnâtiô" string(10) "ërnâtiô" string(10) "ërnâtiô" string(10) "ërnâtiô"
strrev(Internationalisation) string(20) "noitasilanoitanretnI" -- -- string(20) "noitasilanoitanretnI" string(20) "noitasilanoitanretnI" string(20) "noitasilanoitanretnI"
strrev(Iñtërnâtiônàlizætiøn) string(27) "n¸Ãit¦Ãzil Ãn´Ãit¢Ãnr«Ãt±ÃI" -- -- string(27) "nøitæzilànôitânrëtñI" string(27) "nøitæzilànôitânrëtñI" string(27) "nøitæzilànôitânrëtñI"
ord(n) int(110) -- -- int(110) int(110) int(110)
ord(ñ) int(195) -- -- int(241) int(241) int(241)
chr(110) string(1) "n" -- -- -- string(1) "n" string(1) "n"
chr(241) string(1) "ñ" -- -- -- string(2) "ñ" string(2) "ñ"
strpos(Internationalisation, 'n') int(1) int(1) int(1) int(1) int(1) int(1)
strpos(Iñtërnâtiônàlizætiøn, 'ñ') int(1) int(1) int(1) int(1) int(1) int(1)
strpos(Internationalisation, 'n') int(19) int(19) int(19) int(19) int(19) int(19)
strpos(Iñtërnâtiônàlizætiøn, 'n', 11) int(14) int(19) int(19) int(19) int(19) int(19)
strrpos(Internationalisation, 'n') int(19) int(19) int(19) int(19) int(20) int(19)
strrpos(Iñtërnâtiônàlizætiøn, 'n') int(26) int(19) int(19) int(19) int(20) int(19)
strrpos(Internationalisation, 'n', 11) int(19) int(19) -- int(19) int(9) int(19)
strrpos(Iñtërnâtiônàlizætiøn, 'n', 11) int(26) int(19) -- int(19) int(9) int(19)