....................................../////.===Hehe-Here===./////................................................ > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < > < ------------------------------------------------------------------------------------------------------------------- /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// RIFF¤ WEBPVP8 ˜ ðÑ *ôô>‘HŸK¥¤"§£±¨àð enü¹%½_F‘åè¿2ºQú³íªú`N¿­3ÿƒügµJžaÿ¯ÿ°~¼ÎùnúîÞÖô•òíôÁÉß®Sm¥Ü/ ‡ó˜f£Ùà<˜„xëJ¢Ù€SO3x<ªÔ©4¿+ç¶A`q@Ì“Úñè™ÍÿJÌ´ª-˜ÆtÊÛL]Ïq*‘Ý”ì#ŸÌÏãY]@ê`¿ /ªfkØB4·®£ó z—Üw¥Pxù–ÞLШKÇN¾AkÙTf½è'‰g gÆv›Øuh~ a˜Z— ïj*á¥t d£“uÒ ¨`K˜¹ßþ]b>˜]_ÏÔ6W—è2r4x•íÖ…"ƒÖNîä!¦å Ú}ýxGøÌ —@ ;ÆÚŠ=ɾ1ý8lªË¥ô ^yf®Œ¢u&2©nÙÇ›ñÂñŒ³ aPo['½»øFùà­+4ê“$!lövlüÞ=;N®3ð‚õ›DÉKòÞ>ÄÍ ¥ˆuߤ#ˆ$6ù™¥îЇy’ÍB¼ çxÛ;X"WL£R÷͝*ó-¶Zu}º.s¸sšXqù–DþÿvªhüïwyŸ ¯é³lÀ:KCûÄ£Ëá\…­ ~—ýóî ¼ûûÜTÓüÇy…ŽÆvc»¾×U ñ¸žþоP÷¦ó:Ò¨¨5;Ð#&#ÖúñläÿÁœ GxÉ­/ñ‡áQðìYÉtÒw޼GÔ´zàÒò ð*ëzƒ•4~H]Ø‹f ñÓÈñ`NåWçs'ÆÏW^ø¹!XžµmQ5ÃËoLœÎ: ÞËÍ¥J ù…î èo£ßPÎñ¶ž8.Œ]ʵ~5›ÙË-ù*8ÙÖß±~ ©¹rÓê‚j¶d¸{^Q'˜±Crß ÚH—#¥¥QlÀ×ëã‡DÜ«èî þ&Çæžî;ŽÏºò6ÒLÃXy&ZŒ'j‚¢Ù€IßÚù+–MGi‰*jE€‘JcÜ ÓÌ EÏÚj]o˜ Þr <¾U ûŪæÍ/šÝH¥˜b”¼ ÁñßX GP›ï2›4WŠÏà×£…íÓk†¦H·ÅíMh–*nó÷à]ÁjCº€b7<ب‹¨5車bp2:Á[UªM„QŒçiNMa#<5›áËó¸HýÊ"…×Éw¹¦ì2º–x<›»a±¸3Weü®FÝ⑱ö–î–³|LPÈ~çð~Çå‡|º kD¢µÏàÆAI %1À% ¹Ò – ”ϝS¦‰4&¶£°à Öý”û_Ò Áw°A«Å€?mÇÛgHÉ/8)á¾ÛìáöŽP í¨PŸNÙµº¦‡§Ùš"ÿ«>+ªÕ`Ê÷‡‚ß Õû˜þãÇ-PÍ.¾XV‘€ dÜ"þ4¹ ±Oú‘©t¥¦FªÄÃÄ•b‚znýu½—#cDs˜ÃiÑOˆñ×QO=*IAÊ,¶ŽZƒ;‡wøXè%EÐk:F±Ú” .Ѽ+Áu&Ç`."pÈÉw o&¿dE6‘’EqTuK@Ì¥ã™À(Êk(h‰,H}RÀIXÛš3µ1©_OqÚÒJAñ$ÊÙÜ;D3çŒ[þùœh¬Ã³™ö6ç†NY".Ú‰ï[ªŸŒ '²Ð öø_¨ÂÉ9ué¶³ÒŠõTàîMØ#û¯gN‡bÙ놚X„ö …ÉeüÌ^J ‹€.œ$Æ)βÄeæW#óüßĺŸ€ ÀzwV 9oä»f4V*uB «Ë†¹ì¯žR霓æHXa=&“I4K;¯ç‹h×·"UŠ~<•╪Vêª&ÍSÃÆÅ?ÔqÎ*mTM ˜›µwêd#[C¡©§‘D<©àb†–ÁœøvH/,í:¯( ²£|4-„Æövv„Yͼ™^Á$ˆ„¢Û[6yB.åH*V¨æ?$=˜Ñ€•ñ·­(VlŸ‘ nÀt8W÷´Bûba?q9ú¶Xƒl«ÿ\ù¶’þòUÐj/õ¢Ìµ³g$ƒÎR!¸»|Oߍë’BhîÚÑ¢ñåŒJ„®„£2Ð3•ô02Nt…!£Í]Ïc½Qÿ?ˆ<&ÃA¾Ú,JˆijÌ#5yz„‰Î|ÊŽ5QÏ:‹ÐaóVÔxW—CpeÏzÐïíçôÿÅ_[hãsÐ_/ŽTÝ?BîˆííV$<¿i>²F¬_Eß¿ †bÊŒº­ÿ®Z H“C}”¬,Mp ý/Bá£w>˜YV°aƒúh+cŠ- r/[%|üUMHäQ°X»|û/@|°¥Ð !BÔ Ç¢Ä©š+Õì D«7ìN¶ŽðÔ " ƶ’ÖçtA‰Û×}{tþz­¾GÍ›k¹OEJR$ Â׃ «ëÁ"oÉôž$oUK(Ä)Ãz³Ê-‹êN[Ò3Œñbï8P 4ƒ×q¢bo|?<ÛX¬òÄͰL–±›(™ûG?ýË©ÚÄ–ÂDØÐ_Ç¡ô ¾–ÄÏø ×e8Ë©$ÄF¹Å‹ì[©óìl:F¾f´‹‹Xì²ï®\¬ôùƒ ÿat¥óèÒùHß0äe‚;ü×h:ÆWðHž=Ã8骣"kœ'Y?³}Tûè€>?0l›e1Lòñ„aæKÆw…hÖŠùW…ÈÆÄ0ši·›[pcwËþñiêíY/~-Á5˜!¿†A›™Mÿþ(±“t@â“ö2­´TG5yé]çå僳 .·ÍïçÝ7UÚ±Ð/Nè»,_Ï ùdj7\ï Wì4›„»c¸àešg#ÒÊ⥭áØo5‘?ÌdÝô¯ ¹kzsƒ=´#ëÉK›Ø´±-¥eW?‡çßtòTã…$Ý+qÿ±ƒ÷_3Ô¥í÷:æ–ž<·Ö‡‰Å¢ š‡%Ô—utÌÈìðžgÖÀz²À—ï÷Óîäõ{K'´È÷³yaÏÁjƒô}ž§®æÊydÕÈë5¯èˆõvÕ©ã*çD„ “z„Ó‡^^xÂ3M§A´JG‚öï 3W'ˆ.OvXè¡ÊÕª?5º7†˜(˜Ç¶#çê’¶!ÌdZK§æ 0fãaN]òY³RV ™î$®K2R¨`W!1Ôó\;Ý ýB%qæK•&ÓÈe9È0êI±žeŸß -ú@žQr¦ ö4»M¼Áè¹µmw 9 EÆE_°2ó„ŸXKWÁ×Hóì^´²GѝF©óäR†¦‰ç"V»eØ<3ùd3ÿÚ¤Žú“Gi" —‘_ÙËÎ~Üö¯¥½Î»üŸEÚŽåmÞþí ;ÞólËΦMzA"Âf(´òá;Éï(/7½ûñÌ­cïÕçлþÝz¾-ÍvÑ“pH­–ðÓj$¸Äû¤‚‘ãUBË-n“2åPkS5&‹Â|+g^œ®Ì͆d!OïäîU«c;{Û!ÅŽ«ëZ9Ókóˆ]¯ƒ›né `ÇÒ+tÆš (ØKá¾—=3œ®•vuMñg²\ï Ec€ 05±d™‡×iÇ×›UúvÌ¢£Èþ¡ÕØô¶ßÎA"ß±#Ö²ˆÊŸ¦*Ä~ij|àø.-¼'»Ú¥£h ofº¦‡VsR=N½„Î v˜Z*SÌ{=jÑB‹tê…;’HžH¯8–îDù8ñ¢|Q•bÛçš–‹m³“ê¨ åÏ^m¬Žãþ©ïêO‡½6] µÆ„Ooòü ²x}N¦Ë3ïé¿»€›HA˜m%çÞ/¿í7Fø“‹léUk)É°Œµ8Q8›:ÀŠeT*šõ~ôڝG6 ¢}`ùH­–”¡k ‰P1>š†®9z11!X wKfmÁ¦xÑ,N1Q”–æB¶M…ÒÃv6SMˆhU¬ÊPŽï‘öj=·CŒ¯u¹ƒVIЃsx4’ömÛýcå¡¶7ßŠß 57^\wÒÐÆ k§h,Œý î«q^R½3]J¸ÇðN ‚çU¬ôº^Áì} ³f©Õœ§ˆã:FÄÈ‚é(€™?àýÓüè1Gô£¼éj‚OÅñ  #>×—ßtà 0G¥Åa뀐kßhc™À_ÉñÞ#±)GD" YîäË-ÿÙ̪ ¹™a¯´¢E\ÝÒö‚;™„ë]_ p8‰o¡ñ+^÷ 3‘'dT4œŽ ðVë½° :¬víÑ«£tßÚS-3¶“þ2 †üüʨòrš¹M{É_¤`Û¨0ìjœøJ‡:÷ÃáZ˜†@GP&œÑDGÏs¡þ¦þDGú‘1Yá9Ôþ¼ ûø…§÷8&–ÜÑnÄ_m®^üÆ`;ÉVÁJ£?â€-ßê}suÍ2sõA NÌúA磸‘îÿÚ»ƒìö·á¿±tÑÐ"Tÿü˜[@/äj¬€uüªìù¥Ý˜á8Ý´sõj 8@rˆð äþZÇD®ÿUÏ2ùôõrBzÆÏÞž>Ì™xœ“ wiÎ×7_… ¸ \#€MɁV¶¥üÕÿPÔ9Z‡ø§É8#H:ƒ5ÀÝå9ÍIŒ5åKÙŠ÷qÄ>1AÈøžj"µÂд/ªnÀ qªã}"iŸBå˜ÓÛŽ¦…&ݧ;G@—³b¯“•"´4í¨ôM¨åñC‹ïùÉó¯ÓsSH2Ý@ßáM‡ˆKÀªÛUeø/4\gnm¥‹ŸŒ qÄ b9ÞwÒNÏ_4Ég³ú=܆‚´ •â¥õeíþkjz>éÚyU«Íӝ݃6"8/ø{=Ô¢»G¥ äUw°W«,ô—¿ãㆅү¢³xŠUû™yŒ (øSópÐ 9\åTâ»—*oG$/×ÍT†Y¿1¤Þ¢_‡ ¼ „±ÍçèSaÓ 3ÛMÁBkxs‰’R/¡¤ˆÙçª(*õ„üXÌ´ƒ E§´¬EF"Ù”R/ÐNyÆÂ^°?™6¡œïJ·±$§?º>ÖüœcNÌù¯G ‹ñ2ЁBB„^·úìaz¨k:#¨Æ¨8LÎõލ£^§S&cŒÐU€ü(‡F±Š¼&P>8ÙÁ ‰ p5?0ÊÆƒZl¸aô š¼¡}gÿ¶zÆC²¹¬ÎÖG*HB¡O<º2#ñŒAƒ–¡B˜´É$¥›É:FÀÔx¾u?XÜÏÓvN©RS{2ʈãk9rmP¼Qq̳ è¼ÐFׄ^¡Öì fE“F4A…!ì/…¦Lƒ… … $%´¾yã@CI¬ á—3PþBÏNÿ<ý°4Ü ËÃ#ØÍ~âW«rEñw‹eùMMHß²`¬Öó½íf³:‹k˜¯÷}Z!ã¿<¥,\#öµÀ¯aÒNÆIé,Ћ–lŽ#Àæ9ÀÒS·I’½-Ïp Äz¤Š Â* ­íÄ9­< h>׍3ZkËU¹§˜ŒŠ±f­’¤º³Q ÏB?‹#µíÃ¥®@(Gs«†vI¥Mµ‹Á©e~2ú³ÁP4ìÕi‚²Ê^ö@-DþÓàlÜOÍ]n"µã:žpsŽ¢:! Aõ.ç~ÓBûH÷JCÌ]õVƒd «ú´QÙEA–¯¯Œ!.ˆˆëQ±ù œ·Ì!Õâ )ùL„ÅÀlÚè5@B…o´Æ¸XÓ&Û…O«˜”_#‡ƒ„ûÈt!¤ÁÏ›ÎÝŠ?c9 â\>lÓÁVÄÑ™£eØY]:fÝ–—ù+p{™ðè û³”g±OƒÚSù£áÁÊ„ä,ï7š²G ÕÌBk)~ÑiCµ|h#u¤¶îK¨² #²vݯGãeÖ϶ú…¾múÀ¶þÔñ‚Š9'^($¤§ò “š½{éúp÷J›ušS¹áªCÂubÃH9™D™/ZöØÁ‡¦ÝÙŸ·kð*_”.C‹{áXó€‡c¡c€§/šò/&éš÷,àéJþ‰X›fµ“C¨œ®r¬"kL‰Â_q…Z–.ÉL~O µ›zn‚¹À¦Öª7\àHµšÖ %»ÇníV[¥*Õ;ƒ#½¾HK-ÖIÊdÏEÚ#=o÷Óò³´Š: Ç?{¾+9›–‘OEáU·S€˜j"ÄaÜ ŒÛWt› á–c#a»pÔZÞdŽtWê=9éöÊ¢µ~ ë ;Öe‡Œ®:bî3±ýê¢wà¼îpêñ¹¾4 zc¾ðÖÿzdêŒÑÒŝÀ‰s6¤í³ÎÙB¿OZ”+F¤á‡3@Ñëäg©·Ž ˆèª<ù@É{&S„œÕúÀA)‰h:YÀ5^ÂÓŒ°õäU\ ùËÍû#²?Xe¬tu‰^zÒÔãë¼ÛWtEtû …‚g¶Úüâî*moGè¨7%u!]PhÏd™Ý%Îx: VÒ¦ôÊD3ÀŽKÛËãvÆî…N¯ä>Eró–ð`5 Œ%u5XkñÌ*NU%¶áœÊ:Qÿú»“úzyÏ6å-၇¾ ´ ÒÊ]y žO‘w2Äøæ…H’²f±ÎÇ.ª|¥'gîV•Ü .̘¯€šòü¤U~Ù†*¢!?ò wý,}´°ÔÞnïoKq5µb!áÓ3"vAßH¡³¡·G(ÐÎ0Îò¼MG!/ài®@—¬04*`…«é8ªøøló“ˆÊ”èù¤…ßÊoÿé'ËuÌÖ5×È¡§ˆˆfŽë9}hìâ_!!¯  B&Ëö¶‰ÀAÙNVŸ Wh›¸®XÑJì¨ú“¿÷3uj²˜¨ÍÎìë±aúŠÝå¯ð*Ó¨ôJ“yºØ)m°WýOè68†ŸÏ2—‰Ïüꪫٚ¥‹l1 ø ÏÄFjêµvÌbü¦èÝx:X±¢H=MÐß—,ˆÉÇ´(9ú¾^ÅÚ4¿m‡$âX‘å%(AlZo@½¨UOÌÕ”1ø¸jÎÀÃÃ_ µ‘Ü.œº¦Ut: Æï’!=¯uwû#,“pþÇúŒø(é@?³ü¥‘Mo §—s@Œ#)§ŒùkL}NOÆêA›¸~r½¼ÙA—HJ«eˆÖ´*¡ÓpÌŸö.m<-"³ûÈ$¬_6­åf£ïÚâj1y§ÕJ½@dÞÁr&Í\Z%D£Íñ·AZ Û³øüd/ªAi†/Й~  ‡âĮҮÏh§°b—›Û«mJžòG'[ÈYýŒ¦9psl ýÁ ®±f¦x,‰½tN ‚Xª9 ÙÖH.«Lo0×?͹m¡å†Ѽ+›2ƒF ±Ê8 7Hցϓ²Æ–m9…òŸï]Â1äN†VLâCˆU .ÿ‰Ts +ÅÎx(%¦u]6AF Š ØF鈄‘ |¢¶c±soŒ/t[a¾–û:s·`i햍ê›ËchÈ…8ßÀUÜewŒðNOƒõD%q#éû\9¤x¹&UE×G¥ Í—™$ð E6-‡¼!ýpãÔM˜ Âsìe¯ñµK¢Ç¡ùôléœ4Ö£”À Š®Ðc ^¨À}ÙËŸ§›ºê{ÊuÉC ×Sr€¤’fÉ*j!úÓ’Gsùìoîßîn%ò· àc Wp÷$¨˜)û»H ×8ŽÒ€Zj¤3ÀÙºY'Ql¦py{-6íÔCeiØp‘‡XÊîÆUߢ܂ž£Xé¼Y8þ©ëgñß}é.ÎógÒ„ÃØËø¯»™§Xýy M%@NŠ À(~áÐvu7&•,Ù˜ó€uP‡^^®=_E„jt’ 403WebShell
403Webshell
Server IP : 159.198.67.129  /  Your IP : 216.73.216.244
Web Server : LiteSpeed
System : Linux server166.web-hosting.com 4.18.0-513.18.1.lve.el8.x86_64 #1 SMP Thu Feb 22 12:55:50 UTC 2024 x86_64
User : trooaisr ( 4033)
PHP Version : 7.4.33
Disable Function : NONE
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : OFF  |  Pkexec : OFF
Directory :  /proc/self/root/opt/alt/php85/usr/include/php/Zend/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /proc/self/root/opt/alt/php85/usr/include/php/Zend//zend_simd.h
/********************************************************************************
 * MIT License
 * Copyright (c) 2025 Saki Takamachi <saki@sakiot.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *********************************************************************************/


 #ifndef XSSE_H
 #define XSSE_H

 #define XSSE_VERSION 10000

 #ifdef _MSC_VER
 #  define XSSE_FORCE_INLINE __forceinline
 #elif defined(__GNUC__) || defined(__clang__)
 #  define XSSE_FORCE_INLINE inline __attribute__((always_inline))
 #  define XSSE_HAS_MACRO_EXTENSION
 #else
 #  define XSSE_FORCE_INLINE inline
 #endif


 #if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64)
 #include <emmintrin.h>
 #define XSSE2


 #elif defined(__aarch64__) || defined(_M_ARM64)
 #include <arm_neon.h>
 #define XSSE2

 typedef int8x16_t __m128i;


 /*****************************************************************************
  * Load / Store                                                              *
  *****************************************************************************/

 #define _mm_set_epi8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) \
	 ((int8x16_t) { \
		 (int8_t) (x15), (int8_t) (x14), (int8_t) (x13), (int8_t) (x12), \
		 (int8_t) (x11), (int8_t) (x10), (int8_t) (x9),  (int8_t) (x8), \
		 (int8_t) (x7),  (int8_t) (x6),  (int8_t) (x5),  (int8_t) (x4), \
		 (int8_t) (x3),  (int8_t) (x2),  (int8_t) (x1),  (int8_t) (x0) })
 #define _mm_set_epi16(x0, x1, x2, x3, x4, x5, x6, x7) \
	 (vreinterpretq_s8_s16((int16x8_t) { \
		 (int16_t) (x7), (int16_t) (x6), (int16_t) (x5), (int16_t) (x4), \
		 (int16_t) (x3), (int16_t) (x2), (int16_t) (x1), (int16_t) (x0) }))
 #define _mm_set_epi32(x0, x1, x2, x3) \
	 (vreinterpretq_s8_s32((int32x4_t) { (int32_t) (x3), (int32_t) (x2), (int32_t) (x1), (int32_t) (x0) }))
 #define _mm_set_epi64x(x0, x1) (vreinterpretq_s8_s64((int64x2_t) { (int64_t) (x1), (int64_t) (x0) }))
 #define _mm_set1_epi8(x) (vdupq_n_s8((int8_t) (x)))
 #define _mm_set1_epi16(x) (vreinterpretq_s8_s16(vdupq_n_s16((int16_t) (x))))
 #define _mm_set1_epi32(x) (vreinterpretq_s8_s32(vdupq_n_s32((int32_t) (x))))
 #define _mm_set1_epi64x(x) (vreinterpretq_s8_s64(vdupq_n_s64((int64_t) (x))))

 #define _mm_setr_epi8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) \
	 ((int8x16_t) { \
		 (int8_t) (x0), (int8_t) (x1), (int8_t) (x2), (int8_t) (x3), \
		 (int8_t) (x4), (int8_t) (x5), (int8_t) (x6), (int8_t) (x7), \
		 (int8_t) (x8), (int8_t) (x9), (int8_t) (x10), (int8_t) (x11), \
		 (int8_t) (x12), (int8_t) (x13), (int8_t) (x14), (int8_t) (x15) })
 #define _mm_setr_epi16(x0, x1, x2, x3, x4, x5, x6, x7) \
	 (vreinterpretq_s8_s16((int16x8_t) { \
		 (int16_t) (x0), (int16_t) (x1), (int16_t) (x2), (int16_t) (x3), \
		 (int16_t) (x4), (int16_t) (x5), (int16_t) (x6), (int16_t) (x7) }))
 #define _mm_setr_epi32(x0, x1, x2, x3) \
	 (vreinterpretq_s8_s32((int32x4_t) { (int32_t) (x0), (int32_t) (x1), (int32_t) (x2), (int32_t) (x3) }))

 #define _mm_setzero_si128() (vdupq_n_s8(0))

 #define _mm_load_si128(x) (vld1q_s8((const int8_t *) (x)))
 #define _mm_loadu_si128(x) _mm_load_si128(x)

 #define _mm_store_si128(to, x) (vst1q_s8((int8_t *) (to), x))
 #define _mm_storeu_si128(to, x) _mm_store_si128(to, x)
 #define _mm_stream_si128(to, x) _mm_store_si128(to, x)
 #define _mm_stream_si32(to, x) (*(volatile int32_t *)(to) = (int32_t)(x))


 /*****************************************************************************
  * Bit shift / Bit wise                                                      *
  *****************************************************************************/

 #define _mm_or_si128(a, b) (vorrq_s8((a), (b)))
 #define _mm_xor_si128(a, b) (veorq_s8((a), (b)))
 #define _mm_and_si128(a, b) (vandq_s8((a), (b)))
 #define _mm_andnot_si128(a, b) (vbicq_s8((b), (a)))

 #define _mm_slli_epi16(x, count) (vreinterpretq_s8_u16(vshlq_n_u16(vreinterpretq_u16_s8(x), (count))))
 #define _mm_slli_epi32(x, count) (vreinterpretq_s8_u32(vshlq_n_u32(vreinterpretq_u32_s8(x), (count))))
 #define _mm_slli_epi64(x, count) (vreinterpretq_s8_u64(vshlq_n_u64(vreinterpretq_u64_s8(x), (count))))
 static XSSE_FORCE_INLINE __m128i _mm_sll_epi16(__m128i x, __m128i count)
 {
	 uint16_t shift = (uint16_t) (vgetq_lane_s64(vreinterpretq_s64_s8(count), 0) & 0xFFFF);
	 return vreinterpretq_s8_u16(
		 vshlq_u16(vreinterpretq_u16_s8(x), vdupq_n_s16((int16_t) shift))
	 );
 }
 static XSSE_FORCE_INLINE __m128i _mm_sll_epi32(__m128i x, __m128i count)
 {
	 uint32_t shift = (uint32_t) (vgetq_lane_s64(vreinterpretq_s64_s8(count), 0) & 0xFFFFFFFF);
	 return vreinterpretq_s8_u32(
		 vshlq_u32(vreinterpretq_u32_s8(x), vdupq_n_s32((int32_t) shift))
	 );
 }
 static XSSE_FORCE_INLINE __m128i _mm_sll_epi64(__m128i x, __m128i count)
 {
	 uint64_t shift = (uint64_t) vgetq_lane_s64(vreinterpretq_s64_s8(count), 0);
	 return vreinterpretq_s8_u64(
		 vshlq_u64(vreinterpretq_u64_s8(x), vdupq_n_s64((int64_t) shift))
	 );
 }

 #define _mm_slli_si128(x, imm) \
	 ((imm) >= 16 ? vdupq_n_s8(0) : vreinterpretq_s8_u8(vextq_u8(vdupq_n_u8(0), vreinterpretq_u8_s8(x), 16 - (imm))))

 #define _mm_srai_epi16(x, count) (vreinterpretq_s8_s16(vshrq_n_s16(vreinterpretq_s16_s8(x), (count))))
 #define _mm_srai_epi32(x, count) (vreinterpretq_s8_s32(vshrq_n_s32(vreinterpretq_s32_s8(x), (count))))
 static inline __m128i _mm_sra_epi16(__m128i x, __m128i count)
 {
	 uint16_t shift = (uint16_t) (vgetq_lane_s64(vreinterpretq_s64_s8(count), 0) & 0xFFFF);
	 return vreinterpretq_s8_s16(
		 vshlq_s16(vreinterpretq_s16_s8(x), vdupq_n_s16(-(int16_t) shift))
	 );
 }
 static inline __m128i _mm_sra_epi32(__m128i x, __m128i count)
 {
	 uint32_t shift = (uint32_t) (vgetq_lane_s64(vreinterpretq_s64_s8(count), 0) & 0xFFFFFFFF);
	 return vreinterpretq_s8_s32(
		 vshlq_s32(vreinterpretq_s32_s8(x), vdupq_n_s32(-(int32_t) shift))
	 );
 }

 #define _mm_srli_epi16(x, count) (vreinterpretq_s8_u16(vshrq_n_u16(vreinterpretq_u16_s8(x), (count))))
 #define _mm_srli_epi32(x, count) (vreinterpretq_s8_u32(vshrq_n_u32(vreinterpretq_u32_s8(x), (count))))
 #define _mm_srli_epi64(x, count) (vreinterpretq_s8_u64(vshrq_n_u64(vreinterpretq_u64_s8(x), (count))))
 static XSSE_FORCE_INLINE __m128i _mm_srl_epi16(__m128i x, __m128i count)
 {
	 uint16_t shift = (uint16_t) (vgetq_lane_s64(vreinterpretq_s64_s8(count), 0) & 0xFFFF);
	 return vreinterpretq_s8_u16(
		 vshlq_u16(vreinterpretq_u16_s8(x), vdupq_n_s16(-(int16_t) shift))
	 );
 }
 static XSSE_FORCE_INLINE __m128i _mm_srl_epi32(__m128i x, __m128i count)
 {
	 uint32_t shift = (uint32_t) (vgetq_lane_s64(vreinterpretq_s64_s8(count), 0) & 0xFFFFFFFF);
	 return vreinterpretq_s8_u32(
		 vshlq_u32(vreinterpretq_u32_s8(x), vdupq_n_s32(-(int32_t) shift))
	 );
 }
 static XSSE_FORCE_INLINE __m128i _mm_srl_epi64(__m128i x, __m128i count)
 {
	 uint64_t shift = (uint64_t) vgetq_lane_s64(vreinterpretq_s64_s8(count), 0);
	 return vreinterpretq_s8_u64(
		 vshlq_u64(vreinterpretq_u64_s8(x), vdupq_n_s64(-(int64_t) shift))
	 );
 }

 #define _mm_srli_si128(x, imm) \
	 ((imm) >= 16 ? vdupq_n_s8(0) : vreinterpretq_s8_u8(vextq_u8(vreinterpretq_u8_s8(x), vdupq_n_u8(0), (imm))))


 /*****************************************************************************
  * Integer Arithmetic Operations                                             *
  *****************************************************************************/

 /**
  * In practice, there is no problem, but a runtime error for signed integer overflow is triggered by UBSAN,
  * so perform the calculation as unsigned. Since it is optimized at compile time, there are no unnecessary casts at runtime.
  */
 #define _mm_add_epi8(a, b) (vreinterpretq_s8_u8(vaddq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))))
 #define _mm_add_epi16(a, b) (vreinterpretq_s8_u16(vaddq_u16(vreinterpretq_u16_s8(a), vreinterpretq_u16_s8(b))))
 #define _mm_add_epi32(a, b) (vreinterpretq_s8_u32(vaddq_u32(vreinterpretq_u32_s8(a), vreinterpretq_u32_s8(b))))
 #define _mm_add_epi64(a, b) (vreinterpretq_s8_u64(vaddq_u64(vreinterpretq_u64_s8(a), vreinterpretq_u64_s8(b))))

 #define _mm_adds_epi8(a, b) (vqaddq_s8((a), (b)))
 #define _mm_adds_epi16(a, b) (vreinterpretq_s8_s16(vqaddq_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_adds_epu8(a, b) (vreinterpretq_s8_u8(vqaddq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))))
 #define _mm_adds_epu16(a, b) (vreinterpretq_s8_u16(vqaddq_u16(vreinterpretq_u16_s8(a), vreinterpretq_u16_s8(b))))

 #define _mm_avg_epu8(a, b) (vreinterpretq_s8_u8(vrhaddq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))))
 #define _mm_avg_epu16(a, b) (vreinterpretq_s8_u16(vrhaddq_u16(vreinterpretq_u16_s8(a), vreinterpretq_u16_s8(b))))

 static XSSE_FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b)
 {
	 int32x4_t mul_lo = vmull_s16(vget_low_s16(vreinterpretq_s16_s8(a)), vget_low_s16(vreinterpretq_s16_s8(b)));
	 int32x4_t mul_hi = vmull_s16(vget_high_s16(vreinterpretq_s16_s8(a)), vget_high_s16(vreinterpretq_s16_s8(b)));

	 return vreinterpretq_s8_s32(vcombine_s32(
		 vpadd_s32(vget_low_s32(mul_lo), vget_high_s32(mul_lo)),
		 vpadd_s32(vget_low_s32(mul_hi), vget_high_s32(mul_hi))
	 ));
 }

 #define _mm_max_epu8(a, b) (vreinterpretq_s8_u8(vmaxq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))))
 #define _mm_max_epi16(a, b) (vreinterpretq_s8_s16(vmaxq_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_min_epu8(a, b) (vreinterpretq_s8_u8(vminq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))))
 #define _mm_min_epi16(a, b) (vreinterpretq_s8_s16(vminq_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))

 static XSSE_FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
 {
	 int32x4_t lo = vmull_s16(vget_low_s16(vreinterpretq_s16_s8(a)), vget_low_s16(vreinterpretq_s16_s8(b)));
	 int32x4_t hi = vmull_s16(vget_high_s16(vreinterpretq_s16_s8(a)), vget_high_s16(vreinterpretq_s16_s8(b)));
	 return vreinterpretq_s8_s16(vcombine_s16(vshrn_n_s32(lo, 16), vshrn_n_s32(hi, 16)));
 }
 static XSSE_FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
 {
	 uint32x4_t lo = vmull_u16(vget_low_u16(vreinterpretq_u16_s8(a)), vget_low_u16(vreinterpretq_u16_s8(b)));
	 uint32x4_t hi = vmull_u16(vget_high_u16(vreinterpretq_u16_s8(a)), vget_high_u16(vreinterpretq_u16_s8(b)));
	 return vreinterpretq_s8_u16(vcombine_u16(vshrn_n_u32(lo, 16), vshrn_n_u32(hi, 16)));
 }
 static XSSE_FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b)
 {
	 int32x4_t lo = vmull_s16(vget_low_s16(vreinterpretq_s16_s8(a)), vget_low_s16(vreinterpretq_s16_s8(b)));
	 int32x4_t hi = vmull_s16(vget_high_s16(vreinterpretq_s16_s8(a)), vget_high_s16(vreinterpretq_s16_s8(b)));
	 return vreinterpretq_s8_s16(vcombine_s16(vmovn_s32(lo), vmovn_s32(hi)));
 }
 static XSSE_FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b)
 {
	 uint32x4_t evens = vuzpq_u32(vreinterpretq_u32_s8(a), vreinterpretq_u32_s8(b)).val[0];
	 return vreinterpretq_s8_u64(vmull_u32(vget_low_u32(evens), vget_high_u32(evens)));
 }
 static XSSE_FORCE_INLINE __m128i _mm_sad_epu8(__m128i a, __m128i b)
 {
	 uint16x8_t abs_diffs_16 = vpaddlq_u8(vabdq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b)));
	 uint32x4_t abs_diffs_32 = vpaddlq_u16(abs_diffs_16);
	 uint64x2_t abs_diffs_64 = vpaddlq_u32(abs_diffs_32);

	 return vreinterpretq_s8_u16((uint16x8_t) {
		 (int16_t) vgetq_lane_u64(abs_diffs_64, 0), 0, 0, 0,
		 (int16_t) vgetq_lane_u64(abs_diffs_64, 1), 0, 0, 0
	 });
 }

 #define _mm_sub_epi8(a, b) (vreinterpretq_s8_u8(vsubq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))))
 #define _mm_sub_epi16(a, b) (vreinterpretq_s8_u16(vsubq_u16(vreinterpretq_u16_s8(a), vreinterpretq_u16_s8(b))))
 #define _mm_sub_epi32(a, b) (vreinterpretq_s8_u32(vsubq_u32(vreinterpretq_u32_s8(a), vreinterpretq_u32_s8(b))))
 #define _mm_sub_epi64(a, b) (vreinterpretq_s8_u64(vsubq_u64(vreinterpretq_u64_s8(a), vreinterpretq_u64_s8(b))))

 #define _mm_subs_epi8(a, b) (vqsubq_s8((a), (b)))
 #define _mm_subs_epi16(a, b) (vreinterpretq_s8_s16(vqsubq_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_subs_epu8(a, b) (vreinterpretq_s8_u8(vqsubq_u8(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))))
 #define _mm_subs_epu16(a, b) (vreinterpretq_s8_u16(vqsubq_u16(vreinterpretq_u16_s8(a), vreinterpretq_u16_s8(b))))


 /*****************************************************************************
  * Comparison                                                                *
  *****************************************************************************/

 #define _mm_cmpeq_epi8(a, b) (vreinterpretq_s8_u8(vceqq_s8((a), (b))))
 #define _mm_cmpeq_epi16(a, b) (vreinterpretq_s8_u16(vceqq_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_cmpeq_epi32(a, b) (vreinterpretq_s8_u32(vceqq_s32(vreinterpretq_s32_s8(a), vreinterpretq_s32_s8(b))))

 #define _mm_cmplt_epi8(a, b) (vreinterpretq_s8_u8(vcltq_s8((a), (b))))
 #define _mm_cmplt_epi16(a, b) (vreinterpretq_s8_u16(vcltq_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_cmplt_epi32(a, b) (vreinterpretq_s8_u32(vcltq_s32(vreinterpretq_s32_s8(a), vreinterpretq_s32_s8(b))))

 #define _mm_cmpgt_epi8(a, b) (vreinterpretq_s8_u8(vcgtq_s8((a), (b))))
 #define _mm_cmpgt_epi16(a, b) (vreinterpretq_s8_u16(vcgtq_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_cmpgt_epi32(a, b) (vreinterpretq_s8_u32(vcgtq_s32(vreinterpretq_s32_s8(a), vreinterpretq_s32_s8(b))))


 /*****************************************************************************
  * Convert                                                                   *
  *****************************************************************************/

 #define _mm_cvtsi32_si128(x) (vreinterpretq_s8_s32((int32x4_t) { (int32_t) (x), 0, 0, 0 }))
 #define _mm_cvtsi64_si128(x) (vreinterpretq_s8_s64((int64x2_t) { (int64_t) (x), 0 }))
 #define _mm_cvtsi128_si32(x) (vgetq_lane_s32(vreinterpretq_s32_s8(x), 0))
 #define _mm_cvtsi128_si64(x) (vgetq_lane_s64(vreinterpretq_s64_s8(x), 0))


 /*****************************************************************************
  * Others                                                                    *
  *****************************************************************************/

 #define _mm_packs_epi16(a, b) (vcombine_s8(vqmovn_s16(vreinterpretq_s16_s8(a)), vqmovn_s16(vreinterpretq_s16_s8(b))))
 #define _mm_packs_epi32(a, b) \
	 (vreinterpretq_s8_s16(vcombine_s16(vqmovn_s32(vreinterpretq_s32_s8(a)), vqmovn_s32(vreinterpretq_s32_s8(b)))))
 #define _mm_packus_epi16(a, b) \
	 (vreinterpretq_s8_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_s8(a)), vqmovun_s16(vreinterpretq_s16_s8(b)))))

 #define _mm_extract_epi16(x, imm) (vgetq_lane_s16(vreinterpretq_s16_s8(x), (imm)))
 #define _mm_insert_epi16(x, val, imm) (vreinterpretq_s8_s16(vsetq_lane_s16((int16_t) (val), vreinterpretq_s16_s8(x), (imm))))

 static XSSE_FORCE_INLINE int _mm_movemask_epi8(__m128i x)
 {
	 /**
	  * based on code from
	  * https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
	  */
	 uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(vreinterpretq_u8_s8(x), 7));
	 uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
	 uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
	 uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
	 return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) << 8);
 }

 #define _MM_SHUFFLE(a, b, c, d) (((a) << 6) | ((b) << 4) | ((c) << 2) | (d))
 #ifdef XSSE_HAS_MACRO_EXTENSION
 #define _mm_shuffle_epi32(x, imm) __extension__({ \
		 int32x4_t __xsse_tmp = vreinterpretq_s32_s8(x); \
		 vreinterpretq_s8_s32((int32x4_t) { \
			 (int32_t) vgetq_lane_s32(__xsse_tmp, ((imm) >> 0) & 0x3), \
			 (int32_t) vgetq_lane_s32(__xsse_tmp, ((imm) >> 2) & 0x3), \
			 (int32_t) vgetq_lane_s32(__xsse_tmp, ((imm) >> 4) & 0x3), \
			 (int32_t) vgetq_lane_s32(__xsse_tmp, ((imm) >> 6) & 0x3) \
		 }); \
	 })
 #define _mm_shufflehi_epi16(x, imm) __extension__({ \
		 int16x8_t __xsse_tmp = vreinterpretq_s16_s8(x); \
		 vreinterpretq_s8_s16(vcombine_s16( \
			 vget_low_s16(__xsse_tmp), \
			 (int16x4_t) { \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 0) & 0x3) + 4), \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 2) & 0x3) + 4), \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 4) & 0x3) + 4), \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 6) & 0x3) + 4) \
			 } \
		 )); \
	 })
 #define _mm_shufflelo_epi16(x, imm) __extension__({ \
		 int16x8_t __xsse_tmp = vreinterpretq_s16_s8(x); \
		 vreinterpretq_s8_s16(vcombine_s16( \
			 (int16x4_t) { \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 0) & 0x3)), \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 2) & 0x3)), \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 4) & 0x3)), \
				 (int16_t) vgetq_lane_s16(__xsse_tmp, (((imm) >> 6) & 0x3)) \
			 }, \
			 vget_high_s16(__xsse_tmp) \
		 )); \
	 })
 #else
 static XSSE_FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i x, int imm)
 {
	 int32x4_t vec = vreinterpretq_s32_s8(x);
	 int32_t arr[4];
	 vst1q_s32(arr, vec);

	 return vreinterpretq_s8_s32((int32x4_t) {
		 arr[(imm >> 0) & 0x3],
		 arr[(imm >> 2) & 0x3],
		 arr[(imm >> 4) & 0x3],
		 arr[(imm >> 6) & 0x3]
	 });
 }
 static XSSE_FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i x, int imm)
 {
	 int16x8_t vec = vreinterpretq_s16_s8(x);
	 int16_t arr[8];
	 vst1q_s16(arr, vec);

	 return vreinterpretq_s8_s16((int16x8_t) {
		 arr[0], arr[1], arr[2], arr[3],
		 arr[((imm >> 0) & 0x3) + 4],
		 arr[((imm >> 2) & 0x3) + 4],
		 arr[((imm >> 4) & 0x3) + 4],
		 arr[((imm >> 6) & 0x3) + 4]
	 });
 }
 static XSSE_FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i x, int imm)
 {
	 int16x8_t vec = vreinterpretq_s16_s8(x);
	 int16_t arr[8];
	 vst1q_s16(arr, vec);

	 return vreinterpretq_s8_s16((int16x8_t) {
		 arr[((imm >> 0) & 0x3)],
		 arr[((imm >> 2) & 0x3)],
		 arr[((imm >> 4) & 0x3)],
		 arr[((imm >> 6) & 0x3)],
		 arr[4], arr[5], arr[6], arr[7]
	 });
 }
 #endif

 #define _mm_unpackhi_epi8(a, b) (vzip2q_s8((a), (b)))
 #define _mm_unpackhi_epi16(a, b) (vreinterpretq_s8_s16(vzip2q_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_unpackhi_epi32(a, b) (vreinterpretq_s8_s32(vzip2q_s32(vreinterpretq_s32_s8(a), vreinterpretq_s32_s8(b))))
 #define _mm_unpackhi_epi64(a, b) (vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(a), vreinterpretq_s64_s8(b))))

 #define _mm_unpacklo_epi8(a, b) (vzip1q_s8((a), (b)))
 #define _mm_unpacklo_epi16(a, b) (vreinterpretq_s8_s16(vzip1q_s16(vreinterpretq_s16_s8(a), vreinterpretq_s16_s8(b))))
 #define _mm_unpacklo_epi32(a, b) (vreinterpretq_s8_s32(vzip1q_s32(vreinterpretq_s32_s8(a), vreinterpretq_s32_s8(b))))
 #define _mm_unpacklo_epi64(a, b) (vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(a), vreinterpretq_s64_s8(b))))

 #define _mm_move_epi64(x) (vreinterpretq_s8_s64((int64x2_t) { vgetq_lane_s64(vreinterpretq_s64_s8(x), 0), 0 }))

 #endif

 #endif /* XSSE_H */

Youez - 2016 - github.com/yon3zu
LinuXploit