better code
TRANSCRIPT
-
8/9/2019 Better Code
1/32
Writing better code withWriting better code withhelp from the compilerhelp from the compiler
ThiagThiagQt Developer Days & LinuxCon urope ! "Qt Developer Days & LinuxCon urope ! "
-
8/9/2019 Better Code
2/32
Who am I?
-
8/9/2019 Better Code
3/32
Example scenario
Interview queson
You have 2 MB of data and you want to calculate how many bits are set, how wMemory usage is not a constraint (within reason).
-
8/9/2019 Better Code
4/32
staticunsignedchardata[2*1024*1024];
intbitcount(){
intresult=0;
for(inti=0;i
-
8/9/2019 Better Code
5/32
Approach 2: use a lookup table
staticunsignedchardata[2*1024*1024];e!ternconstushortbitcounttable[%&%];
intbitcount(){ intresult=0; for(inti=0;i
-
8/9/2019 Better Code
6/32
My answer
# Use the POPCNT instrucon
$ %dded with the &rst 'ntel orei* genera+on, ehalem (--.2, but se/arate
-
8/9/2019 Better Code
7/32
How o you use the !"!#$% instruction?
# Write assembly
# Use the GCC intrinsic: builtin+o+count()
# Use the Intel intrinsic: ,,+o+cntu&2()
-
8/9/2019 Better Code
8/32
When can I use the instruction?
# Use uncondionally
# Chec! CPUI"
# #s! the lin!er $or hel%
# Chec! i$ surroundin& code already requires a CPU that su%%orts the anyway
-
8/9/2019 Better Code
9/32
#hoosin& the solution
# What a'ects the choice:
$ 01s it will run on
$ om/ilers 5 toolchains it will be com/iled with
$ 6ibraries you7re using
-
8/9/2019 Better Code
10/32
"ther architectures
# Intrinsics e(ist $or #)* and PowerPC too +Neon and #lvec,
# Not all com%iler $eatures wor! on those architectures yet
# -ut not discussed on this %resentaon
-
8/9/2019 Better Code
11/32
(sing intrinsics(sing intrinsics
-
8/9/2019 Better Code
12/32
'inin& out which intrinsic to use
# Use the ."*/ 0u!e
-
8/9/2019 Better Code
13/32
Examples usin& intrinsics
# The %o%ulaon count # Calculan& C)C12
staticunsignedchardata[2*1024*1024];
intbitcount(){ intresult=0; for(inti=0;i
-
8/9/2019 Better Code
14/32
Where are intrinsics allowe?
3or all com%ilers: recent enou&h +e4&4/ GCC 546 $or #782/ 549 $or #78;2
Compiler Permitted usage
Microsoft )isual *tudio+nywhere, no special build options re-uired
.ntel C// Compiler
Clang +nywhere, as long as code generation is enabled
0e1g1, $%a&'# $%a&'2# $%arch=core$a&'$i# etc1 acti3CC '14 or earlier
3CC '15
Code generation enabled6 or
functions decorated with attribute((target(
0etc12
-
8/9/2019 Better Code
15/32
How I sol(e this for )t *+,
# *acro $or tesn& with 4 u"2 u1> u? goes !else
staticuintcrc"2(@@@){ +693:-7:A3(); return0;!endif
-
8/9/2019 Better Code
16/32
7untime dispatching7untime dispatching
-
8/9/2019 Better Code
17/32
-untime ispatchin& basics
;,"etect CPU
2,"etermine best im%lementaon1,)un it
With 3CC '148
0doesn9t wor: withClang, .CC or M*)C2
&oidfunctionsse2();&oidfunctionlain();
&oidfunction(){
if(builtincusuor functionsse2(); else functionlain();!
&oidfunctionsse2();&oidfunctionlain();&oidfunction()
{ if(*-6suorts553 functionsse2(); else functionlain();!
-
8/9/2019 Better Code
18/32
Ientifyin& the #!.
# )unnin& CPUI" le= as an e(ercise to the reader
# >ust remember: cachethe result
e'ternintBtcufeatures;e'tern&oidBCetect-u8eatures(&oid);
staticinlineintB-u8eatures(){ intfeatures=Btcufeatures;
if(+69D3E(features==0)){ BCetect-u8eatures(); features=Btcufeatures; ! returnfeatures;!
-
8/9/2019 Better Code
19/32
#heckin& surrounin& coe
-
8/9/2019 Better Code
20/32
!uttin& it toðer
# )esult on ?5@bit: uncondional call to the ..A2 version
&oidfunctionsse2();
&oidfunctionlain();&oidfunction()
{
if(B-u7as8eature(5532))
functionsse2();
else
functionlain();
!
-
8/9/2019 Better Code
21/32
&oid*%e%cF(&oid*#const&oid*#sizet)
attribute((ifunc(resol&e%e%cF)));
decltFe(%e%cF)%e%cFa&'#%e%cFsse2;
autoresol&e%e%cF()
{
returnB-u7as8eature(:GH)I%e%cFa&'J%e%cFsse2;
!
&oid*%e%cF(&oid*#const&oid*#sizet)
attribute((ifunc(resol&e%e%cF)));
&oid*%e%cFa&'(&oid*#const&oid*#sizet);
&oid*%e%cFsse2(&oid*#const&oid*#sizet);static &oid*(*resol&e%e%cF(&oid))(&oid*#const&oid*#sizet)
{
returnB-u7as8eature(:GH)I%e%cFa&'J%e%cFsse2;
!
Askin& the linker an ynamic linker for help
# )equires:
$ :libc 2.88.8, Binu+ls 2.29.8, : . 5 ' 8.9
$ ot su//orted with lang or on %ndroid (due to Bionic)
-
8/9/2019 Better Code
22/32
/## ,+0 autoispatcher a+k+a+ 3'unction Multi 4ersionin&56
# CBB only
attribute((target(ocnt)))
intbitcount()
{
intresult=0; for(inti=0;i
-
8/9/2019 Better Code
23/32
'inin& better answers to inter(iew 7uestions
# Dow would you write a $uncon that returns a 12@bit random num
# Dow would you ero@e(tend a bloc! o$ data $rom H@ to ;?@bitEF
# Dow do you calculate the ne(t %ower o$ 2 $or a &iven non@ero inte
uint"2ne'toKer.f,Ko(uint"2&)
{
&$$;
&L=&MM1;
&L=&MM2;
&L=&MM4; &L=&MM?;
&L=&MM1>;
&;
return&;
!
-
8/9/2019 Better Code
24/32
8etter answer
uint"2ne'toKer.f,Ko'?>(uint"2&)
{
intid'=bitscanre&erse(&);
return26
-
8/9/2019 Better Code
25/32
9ummary
# 0earn $rom the ."*: use intrinsics
# Chec! the CPU at com%ile me/ run me and dis%atch
# Use library/ com%iler and lin!er tools
-
8/9/2019 Better Code
26/32
eroextenin& from ; to 1
-
8/9/2019 Better Code
27/32
=eft to the whims of the compiler ">6
-.. 4/$ 2d$c ,odu (3rsi3ra!1)3!,,1 2d51 add 60!13r10
2d5 ,oda 3!,,13!,,& 2d55 +un+c7hb8 3!,,03!,,1 2d5d +un+c7lb8 3!,,03!,,& 2da1 ,odu 3!,,10!10(3rdi3ra!2) 2da9 ,odu 3!,,&(3rdi3ra!2) 2dac add 60!103ra! 2db0 c,+ 3r53r10 2db& :b 2d$c
.lang &/4 210 ,o (3rsi 21 +un+c7lb8 3!,,0
215 ,o 0!$(3r 21f +un+c7lb8 3!,,0 21%& +and 3!,,0 21%9 +and 3!,,0 21%b ,odu 3!,,1 2190 ,odu 3!,,2 219% add 60!10 219a c,+ 3rc!3 219d :ne 210
.. 14 9d& ,o (3r$3rsi1)3!,,1 9d5 +un+c7lb8 3!,,03!,,1 9dd ,oda 3!,,1(3rdi3r$2) 9e& add 60!$3r$ 9e9 c,+ 3ra!3r$ 9ea :b 9d&
-
8/9/2019 Better Code
28/32
=eft to the whims of the compiler "> ma(x26
-.. 4/5 2bb% ,odu (3rsi3rdi1)3,,0 2bbb add 60!13r11
2bbf +,oz!b8 3!,,03,,1 2bc4 e!tracti12$ 60!13,,03!,,0 2bca +,oz!b8 3!,,03,,0 2bcf ,oda 3,,1(3rb!3rdi2) 2bd4 ,oda 3,,00!20(3rb!3rdi2) 2bda add 60!203rdi 2bde c,+ 3r113ra! 2be1 :a 2bb%
.lang &/4 21a0 ,odu 0!2 21a ,odu 0!1
21aa ,odu (3rs 21ae +,oz!b8 3!,, 21b& +,oz!b8 3!,, 21b$ +,oz!b8 3!,, 21bd ,odu 3,, 21c2 ,odu 3,, 21c9 ,odu 3,, 21cb add 60!% 21cf add 60!&
21d& add 60!ffff 21d9 c,+ 3rc! 21da :ne 21a0
.. 14 9dc +,oz!b8 (3r$3rsi1)3,,0 9e2 ,odu 3,,0(3rdi3r$2) 9e$ add 60!103r$ 9ec c,+ 3ra!3r$ 9ef :b 9dc
-
8/9/2019 Better Code
29/32
Helpin& out the compiler
# GCCs im%lementaon was the best with ..A2
$ ' /roduces be;er code for %
-
8/9/2019 Better Code
30/32
#oe &enerate with the intrinsics
efore 2d$c ,odu (3rsi3ra!1)3!,,1 2d51 add 60!13r10
2d5 ,oda 3!,,13!,,& 2d55 +un+c7hb8 3!,,03!,,1 2d5d +un+c7lb8 3!,,03!,,& 2da1 ,odu 3!,,10!10(3rdi3ra!2) 2da9 ,odu 3!,,&(3rdi3ra!2) 2dac add 60!103ra! 2db0 c,+ 3r53r10 2db& :b 2d$c
>fter 2d90 ,odu (3rsi 2d9 add 60!10
2d95 add 3rc! 2d9c c,+ 3r$3 2d9f ,oda 3!,,0 2d$& +un+c7hb8 3!,,1 2d$9 +un+c7lb8 3!,,1 2d$b ,odu 3!,,0 2d51 ,odu 3!,,2 2d5% ,o 3ra! 2d55 :ne 2d90
-eJer or worseE
-
8/9/2019 Better Code
31/32
Extenin& to A42 support
constchar*e=strsize;
Btrdiffoffset=0; KeOregoingtoreadstr[offset@@offset1P](1>bFtes) for(;stroffset1P){ const%12?ichunN=,,loadusi12$((%12?i*)(stroffset));load 1> bFtifdef:GH2 zeroe'tendtoanE//register
const%2P>ie'tended=,,2%cte+u$e+i1%(chunN); store "2 bFtes
,,2%storeusi2%((%2P>i*)(dstoffset)#e'tended);else const %12?i null/asN = %%set1ei"2(0);
unacN the first ? bFtes# adding Kith zeros
const %12?i first7alf = %%unacNloei?(chunN# null/asN); %%storeusi12?((%12?i*)(dst offset)# first7alf); store 1> bFtes
unacN the last ? bFtes# adding Kith zeros const %12?i second7alf = %%unacNhiei? (chunN# null/asN); %%storeusi12?((%12?i*)(dst offset ?)# second7alf); store ne't 1> bF
endif !
-
8/9/2019 Better Code
32/32
Thiago Macieirathiago1macieira