cpuid_amd64.go 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. // Generated, DO NOT EDIT,
  2. // but copy it to your own project and rename the package.
  3. // See more at http://github.com/klauspost/cpuid
  4. // +build !appengine
  5. // +build gc
  6. // +build !noasm
  7. package s2
  8. import "strings"
  9. func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
  10. func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  11. func asmXgetbv(index uint32) (eax, edx uint32)
  12. func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
  13. func initCPU() {
  14. cpuid = asmCpuid
  15. cpuidex = asmCpuidex
  16. xgetbv = asmXgetbv
  17. rdtscpAsm = asmRdtscpAsm
  18. }
  19. // Vendor is a representation of a CPU vendor.
  20. type vendor int
  21. const (
  22. other vendor = iota
  23. intel
  24. amd
  25. via
  26. transmeta
  27. nsc
  28. kvm // Kernel-based Virtual Machine
  29. msvm // Microsoft Hyper-V or Windows Virtual PC
  30. vmware
  31. xenhvm
  32. bhyve
  33. hygon
  34. )
  35. const (
  36. cmov = 1 << iota // i686 CMOV
  37. nx // NX (No-Execute) bit
  38. amd3dnow // AMD 3DNOW
  39. amd3dnowext // AMD 3DNowExt
  40. mmx // standard MMX
  41. mmxext // SSE integer functions or AMD MMX ext
  42. sse // SSE functions
  43. sse2 // P4 SSE functions
  44. sse3 // Prescott SSE3 functions
  45. ssse3 // Conroe SSSE3 functions
  46. sse4 // Penryn SSE4.1 functions
  47. sse4a // AMD Barcelona microarchitecture SSE4a instructions
  48. sse42 // Nehalem SSE4.2 functions
  49. avx // AVX functions
  50. avx2 // AVX2 functions
  51. fma3 // Intel FMA 3
  52. fma4 // Bulldozer FMA4 functions
  53. xop // Bulldozer XOP functions
  54. f16c // Half-precision floating-point conversion
  55. bmi1 // Bit Manipulation Instruction Set 1
  56. bmi2 // Bit Manipulation Instruction Set 2
  57. tbm // AMD Trailing Bit Manipulation
  58. lzcnt // LZCNT instruction
  59. popcnt // POPCNT instruction
  60. aesni // Advanced Encryption Standard New Instructions
  61. clmul // Carry-less Multiplication
  62. htt // Hyperthreading (enabled)
  63. hle // Hardware Lock Elision
  64. rtm // Restricted Transactional Memory
  65. rdrand // RDRAND instruction is available
  66. rdseed // RDSEED instruction is available
  67. adx // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  68. sha // Intel SHA Extensions
  69. avx512f // AVX-512 Foundation
  70. avx512dq // AVX-512 Doubleword and Quadword Instructions
  71. avx512ifma // AVX-512 Integer Fused Multiply-Add Instructions
  72. avx512pf // AVX-512 Prefetch Instructions
  73. avx512er // AVX-512 Exponential and Reciprocal Instructions
  74. avx512cd // AVX-512 Conflict Detection Instructions
  75. avx512bw // AVX-512 Byte and Word Instructions
  76. avx512vl // AVX-512 Vector Length Extensions
  77. avx512vbmi // AVX-512 Vector Bit Manipulation Instructions
  78. avx512vbmi2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
  79. avx512vnni // AVX-512 Vector Neural Network Instructions
  80. avx512vpopcntdq // AVX-512 Vector Population Count Doubleword and Quadword
  81. gfni // Galois Field New Instructions
  82. vaes // Vector AES
  83. avx512bitalg // AVX-512 Bit Algorithms
  84. vpclmulqdq // Carry-Less Multiplication Quadword
  85. avx512bf16 // AVX-512 BFLOAT16 Instructions
  86. avx512vp2intersect // AVX-512 Intersect for D/Q
  87. mpx // Intel MPX (Memory Protection Extensions)
  88. erms // Enhanced REP MOVSB/STOSB
  89. rdtscp // RDTSCP Instruction
  90. cx16 // CMPXCHG16B Instruction
  91. sgx // Software Guard Extensions
  92. sgxlc // Software Guard Extensions Launch Control
  93. ibpb // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
  94. stibp // Single Thread Indirect Branch Predictors
  95. vmx // Virtual Machine Extensions
  96. // Performance indicators
  97. sse2slow // SSE2 is supported, but usually not faster
  98. sse3slow // SSE3 is supported, but usually not faster
  99. atom // Atom processor, some SSSE3 instructions are slower
  100. )
  101. var flagNames = map[flags]string{
  102. cmov: "CMOV", // i686 CMOV
  103. nx: "NX", // NX (No-Execute) bit
  104. amd3dnow: "AMD3DNOW", // AMD 3DNOW
  105. amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt
  106. mmx: "MMX", // Standard MMX
  107. mmxext: "MMXEXT", // SSE integer functions or AMD MMX ext
  108. sse: "SSE", // SSE functions
  109. sse2: "SSE2", // P4 SSE2 functions
  110. sse3: "SSE3", // Prescott SSE3 functions
  111. ssse3: "SSSE3", // Conroe SSSE3 functions
  112. sse4: "SSE4.1", // Penryn SSE4.1 functions
  113. sse4a: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
  114. sse42: "SSE4.2", // Nehalem SSE4.2 functions
  115. avx: "AVX", // AVX functions
  116. avx2: "AVX2", // AVX functions
  117. fma3: "FMA3", // Intel FMA 3
  118. fma4: "FMA4", // Bulldozer FMA4 functions
  119. xop: "XOP", // Bulldozer XOP functions
  120. f16c: "F16C", // Half-precision floating-point conversion
  121. bmi1: "BMI1", // Bit Manipulation Instruction Set 1
  122. bmi2: "BMI2", // Bit Manipulation Instruction Set 2
  123. tbm: "TBM", // AMD Trailing Bit Manipulation
  124. lzcnt: "LZCNT", // LZCNT instruction
  125. popcnt: "POPCNT", // POPCNT instruction
  126. aesni: "AESNI", // Advanced Encryption Standard New Instructions
  127. clmul: "CLMUL", // Carry-less Multiplication
  128. htt: "HTT", // Hyperthreading (enabled)
  129. hle: "HLE", // Hardware Lock Elision
  130. rtm: "RTM", // Restricted Transactional Memory
  131. rdrand: "RDRAND", // RDRAND instruction is available
  132. rdseed: "RDSEED", // RDSEED instruction is available
  133. adx: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  134. sha: "SHA", // Intel SHA Extensions
  135. avx512f: "AVX512F", // AVX-512 Foundation
  136. avx512dq: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
  137. avx512ifma: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
  138. avx512pf: "AVX512PF", // AVX-512 Prefetch Instructions
  139. avx512er: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
  140. avx512cd: "AVX512CD", // AVX-512 Conflict Detection Instructions
  141. avx512bw: "AVX512BW", // AVX-512 Byte and Word Instructions
  142. avx512vl: "AVX512VL", // AVX-512 Vector Length Extensions
  143. avx512vbmi: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
  144. avx512vbmi2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2
  145. avx512vnni: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions
  146. avx512vpopcntdq: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword
  147. gfni: "GFNI", // Galois Field New Instructions
  148. vaes: "VAES", // Vector AES
  149. avx512bitalg: "AVX512BITALG", // AVX-512 Bit Algorithms
  150. vpclmulqdq: "VPCLMULQDQ", // Carry-Less Multiplication Quadword
  151. avx512bf16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction
  152. avx512vp2intersect: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
  153. mpx: "MPX", // Intel MPX (Memory Protection Extensions)
  154. erms: "ERMS", // Enhanced REP MOVSB/STOSB
  155. rdtscp: "RDTSCP", // RDTSCP Instruction
  156. cx16: "CX16", // CMPXCHG16B Instruction
  157. sgx: "SGX", // Software Guard Extensions
  158. sgxlc: "SGXLC", // Software Guard Extensions Launch Control
  159. ibpb: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
  160. stibp: "STIBP", // Single Thread Indirect Branch Predictors
  161. vmx: "VMX", // Virtual Machine Extensions
  162. // Performance indicators
  163. sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster
  164. sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster
  165. atom: "ATOM", // Atom processor, some SSSE3 instructions are slower
  166. }
  167. // CPUInfo contains information about the detected system CPU.
  168. type cpuInfo struct {
  169. brandname string // Brand name reported by the CPU
  170. vendorid vendor // Comparable CPU vendor ID
  171. features flags // Features of the CPU
  172. physicalcores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
  173. threadspercore int // Number of threads per physical core. Will be 1 if undetectable.
  174. logicalcores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
  175. family int // CPU family number
  176. model int // CPU model number
  177. cacheline int // Cache line size in bytes. Will be 0 if undetectable.
  178. cache struct {
  179. l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
  180. l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected
  181. l2 int // L2 Cache (per core or shared). Will be -1 if undetected
  182. l3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
  183. }
  184. sgx sgxsupport
  185. maxFunc uint32
  186. maxExFunc uint32
  187. }
  188. var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
  189. var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  190. var xgetbv func(index uint32) (eax, edx uint32)
  191. var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
  192. // CPU contains information about the CPU as detected on startup,
  193. // or when Detect last was called.
  194. //
  195. // Use this as the primary entry point to you data,
  196. // this way queries are
  197. var cpu cpuInfo
  198. func init() {
  199. initCPU()
  200. detect()
  201. }
  202. // Detect will re-detect current CPU info.
  203. // This will replace the content of the exported CPU variable.
  204. //
  205. // Unless you expect the CPU to change while you are running your program
  206. // you should not need to call this function.
  207. // If you call this, you must ensure that no other goroutine is accessing the
  208. // exported CPU variable.
  209. func detect() {
  210. cpu.maxFunc = maxFunctionID()
  211. cpu.maxExFunc = maxExtendedFunction()
  212. cpu.brandname = brandName()
  213. cpu.cacheline = cacheLine()
  214. cpu.family, cpu.model = familyModel()
  215. cpu.features = support()
  216. cpu.sgx = hasSGX(cpu.features&sgx != 0, cpu.features&sgxlc != 0)
  217. cpu.threadspercore = threadsPerCore()
  218. cpu.logicalcores = logicalCores()
  219. cpu.physicalcores = physicalCores()
  220. cpu.vendorid = vendorID()
  221. cpu.cacheSize()
  222. }
  223. // Generated here: http://play.golang.org/p/BxFH2Gdc0G
  224. // Cmov indicates support of CMOV instructions
  225. func (c cpuInfo) cmov() bool {
  226. return c.features&cmov != 0
  227. }
  228. // Amd3dnow indicates support of AMD 3DNOW! instructions
  229. func (c cpuInfo) amd3dnow() bool {
  230. return c.features&amd3dnow != 0
  231. }
  232. // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
  233. func (c cpuInfo) amd3dnowext() bool {
  234. return c.features&amd3dnowext != 0
  235. }
  236. // VMX indicates support of VMX
  237. func (c cpuInfo) vmx() bool {
  238. return c.features&vmx != 0
  239. }
  240. // MMX indicates support of MMX instructions
  241. func (c cpuInfo) mmx() bool {
  242. return c.features&mmx != 0
  243. }
  244. // MMXExt indicates support of MMXEXT instructions
  245. // (SSE integer functions or AMD MMX ext)
  246. func (c cpuInfo) mmxext() bool {
  247. return c.features&mmxext != 0
  248. }
  249. // SSE indicates support of SSE instructions
  250. func (c cpuInfo) sse() bool {
  251. return c.features&sse != 0
  252. }
  253. // SSE2 indicates support of SSE 2 instructions
  254. func (c cpuInfo) sse2() bool {
  255. return c.features&sse2 != 0
  256. }
  257. // SSE3 indicates support of SSE 3 instructions
  258. func (c cpuInfo) sse3() bool {
  259. return c.features&sse3 != 0
  260. }
  261. // SSSE3 indicates support of SSSE 3 instructions
  262. func (c cpuInfo) ssse3() bool {
  263. return c.features&ssse3 != 0
  264. }
  265. // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
  266. func (c cpuInfo) sse4() bool {
  267. return c.features&sse4 != 0
  268. }
  269. // SSE42 indicates support of SSE4.2 instructions
  270. func (c cpuInfo) sse42() bool {
  271. return c.features&sse42 != 0
  272. }
  273. // AVX indicates support of AVX instructions
  274. // and operating system support of AVX instructions
  275. func (c cpuInfo) avx() bool {
  276. return c.features&avx != 0
  277. }
  278. // AVX2 indicates support of AVX2 instructions
  279. func (c cpuInfo) avx2() bool {
  280. return c.features&avx2 != 0
  281. }
  282. // FMA3 indicates support of FMA3 instructions
  283. func (c cpuInfo) fma3() bool {
  284. return c.features&fma3 != 0
  285. }
  286. // FMA4 indicates support of FMA4 instructions
  287. func (c cpuInfo) fma4() bool {
  288. return c.features&fma4 != 0
  289. }
  290. // XOP indicates support of XOP instructions
  291. func (c cpuInfo) xop() bool {
  292. return c.features&xop != 0
  293. }
  294. // F16C indicates support of F16C instructions
  295. func (c cpuInfo) f16c() bool {
  296. return c.features&f16c != 0
  297. }
  298. // BMI1 indicates support of BMI1 instructions
  299. func (c cpuInfo) bmi1() bool {
  300. return c.features&bmi1 != 0
  301. }
  302. // BMI2 indicates support of BMI2 instructions
  303. func (c cpuInfo) bmi2() bool {
  304. return c.features&bmi2 != 0
  305. }
  306. // TBM indicates support of TBM instructions
  307. // (AMD Trailing Bit Manipulation)
  308. func (c cpuInfo) tbm() bool {
  309. return c.features&tbm != 0
  310. }
  311. // Lzcnt indicates support of LZCNT instruction
  312. func (c cpuInfo) lzcnt() bool {
  313. return c.features&lzcnt != 0
  314. }
  315. // Popcnt indicates support of POPCNT instruction
  316. func (c cpuInfo) popcnt() bool {
  317. return c.features&popcnt != 0
  318. }
  319. // HTT indicates the processor has Hyperthreading enabled
  320. func (c cpuInfo) htt() bool {
  321. return c.features&htt != 0
  322. }
  323. // SSE2Slow indicates that SSE2 may be slow on this processor
  324. func (c cpuInfo) sse2slow() bool {
  325. return c.features&sse2slow != 0
  326. }
  327. // SSE3Slow indicates that SSE3 may be slow on this processor
  328. func (c cpuInfo) sse3slow() bool {
  329. return c.features&sse3slow != 0
  330. }
  331. // AesNi indicates support of AES-NI instructions
  332. // (Advanced Encryption Standard New Instructions)
  333. func (c cpuInfo) aesni() bool {
  334. return c.features&aesni != 0
  335. }
  336. // Clmul indicates support of CLMUL instructions
  337. // (Carry-less Multiplication)
  338. func (c cpuInfo) clmul() bool {
  339. return c.features&clmul != 0
  340. }
  341. // NX indicates support of NX (No-Execute) bit
  342. func (c cpuInfo) nx() bool {
  343. return c.features&nx != 0
  344. }
  345. // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
  346. func (c cpuInfo) sse4a() bool {
  347. return c.features&sse4a != 0
  348. }
  349. // HLE indicates support of Hardware Lock Elision
  350. func (c cpuInfo) hle() bool {
  351. return c.features&hle != 0
  352. }
  353. // RTM indicates support of Restricted Transactional Memory
  354. func (c cpuInfo) rtm() bool {
  355. return c.features&rtm != 0
  356. }
  357. // Rdrand indicates support of RDRAND instruction is available
  358. func (c cpuInfo) rdrand() bool {
  359. return c.features&rdrand != 0
  360. }
  361. // Rdseed indicates support of RDSEED instruction is available
  362. func (c cpuInfo) rdseed() bool {
  363. return c.features&rdseed != 0
  364. }
  365. // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  366. func (c cpuInfo) adx() bool {
  367. return c.features&adx != 0
  368. }
  369. // SHA indicates support of Intel SHA Extensions
  370. func (c cpuInfo) sha() bool {
  371. return c.features&sha != 0
  372. }
  373. // AVX512F indicates support of AVX-512 Foundation
  374. func (c cpuInfo) avx512f() bool {
  375. return c.features&avx512f != 0
  376. }
  377. // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
  378. func (c cpuInfo) avx512dq() bool {
  379. return c.features&avx512dq != 0
  380. }
  381. // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
  382. func (c cpuInfo) avx512ifma() bool {
  383. return c.features&avx512ifma != 0
  384. }
  385. // AVX512PF indicates support of AVX-512 Prefetch Instructions
  386. func (c cpuInfo) avx512pf() bool {
  387. return c.features&avx512pf != 0
  388. }
  389. // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
  390. func (c cpuInfo) avx512er() bool {
  391. return c.features&avx512er != 0
  392. }
  393. // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
  394. func (c cpuInfo) avx512cd() bool {
  395. return c.features&avx512cd != 0
  396. }
  397. // AVX512BW indicates support of AVX-512 Byte and Word Instructions
  398. func (c cpuInfo) avx512bw() bool {
  399. return c.features&avx512bw != 0
  400. }
  401. // AVX512VL indicates support of AVX-512 Vector Length Extensions
  402. func (c cpuInfo) avx512vl() bool {
  403. return c.features&avx512vl != 0
  404. }
  405. // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
  406. func (c cpuInfo) avx512vbmi() bool {
  407. return c.features&avx512vbmi != 0
  408. }
  409. // AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
  410. func (c cpuInfo) avx512vbmi2() bool {
  411. return c.features&avx512vbmi2 != 0
  412. }
  413. // AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
  414. func (c cpuInfo) avx512vnni() bool {
  415. return c.features&avx512vnni != 0
  416. }
  417. // AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
  418. func (c cpuInfo) avx512vpopcntdq() bool {
  419. return c.features&avx512vpopcntdq != 0
  420. }
  421. // GFNI indicates support of Galois Field New Instructions
  422. func (c cpuInfo) gfni() bool {
  423. return c.features&gfni != 0
  424. }
  425. // VAES indicates support of Vector AES
  426. func (c cpuInfo) vaes() bool {
  427. return c.features&vaes != 0
  428. }
  429. // AVX512BITALG indicates support of AVX-512 Bit Algorithms
  430. func (c cpuInfo) avx512bitalg() bool {
  431. return c.features&avx512bitalg != 0
  432. }
  433. // VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
  434. func (c cpuInfo) vpclmulqdq() bool {
  435. return c.features&vpclmulqdq != 0
  436. }
  437. // AVX512BF16 indicates support of
  438. func (c cpuInfo) avx512bf16() bool {
  439. return c.features&avx512bf16 != 0
  440. }
  441. // AVX512VP2INTERSECT indicates support of
  442. func (c cpuInfo) avx512vp2intersect() bool {
  443. return c.features&avx512vp2intersect != 0
  444. }
  445. // MPX indicates support of Intel MPX (Memory Protection Extensions)
  446. func (c cpuInfo) mpx() bool {
  447. return c.features&mpx != 0
  448. }
  449. // ERMS indicates support of Enhanced REP MOVSB/STOSB
  450. func (c cpuInfo) erms() bool {
  451. return c.features&erms != 0
  452. }
  453. // RDTSCP Instruction is available.
  454. func (c cpuInfo) rdtscp() bool {
  455. return c.features&rdtscp != 0
  456. }
  457. // CX16 indicates if CMPXCHG16B instruction is available.
  458. func (c cpuInfo) cx16() bool {
  459. return c.features&cx16 != 0
  460. }
  461. // TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
  462. // So TSX simply checks that.
  463. func (c cpuInfo) tsx() bool {
  464. return c.features&(hle|rtm) == hle|rtm
  465. }
  466. // Atom indicates an Atom processor
  467. func (c cpuInfo) atom() bool {
  468. return c.features&atom != 0
  469. }
  470. // Intel returns true if vendor is recognized as Intel
  471. func (c cpuInfo) intel() bool {
  472. return c.vendorid == intel
  473. }
  474. // AMD returns true if vendor is recognized as AMD
  475. func (c cpuInfo) amd() bool {
  476. return c.vendorid == amd
  477. }
  478. // Hygon returns true if vendor is recognized as Hygon
  479. func (c cpuInfo) hygon() bool {
  480. return c.vendorid == hygon
  481. }
  482. // Transmeta returns true if vendor is recognized as Transmeta
  483. func (c cpuInfo) transmeta() bool {
  484. return c.vendorid == transmeta
  485. }
  486. // NSC returns true if vendor is recognized as National Semiconductor
  487. func (c cpuInfo) nsc() bool {
  488. return c.vendorid == nsc
  489. }
  490. // VIA returns true if vendor is recognized as VIA
  491. func (c cpuInfo) via() bool {
  492. return c.vendorid == via
  493. }
  494. // RTCounter returns the 64-bit time-stamp counter
  495. // Uses the RDTSCP instruction. The value 0 is returned
  496. // if the CPU does not support the instruction.
  497. func (c cpuInfo) rtcounter() uint64 {
  498. if !c.rdtscp() {
  499. return 0
  500. }
  501. a, _, _, d := rdtscpAsm()
  502. return uint64(a) | (uint64(d) << 32)
  503. }
  504. // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
  505. // This variable is OS dependent, but on Linux contains information
  506. // about the current cpu/core the code is running on.
  507. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
  508. func (c cpuInfo) ia32tscaux() uint32 {
  509. if !c.rdtscp() {
  510. return 0
  511. }
  512. _, _, ecx, _ := rdtscpAsm()
  513. return ecx
  514. }
  515. // LogicalCPU will return the Logical CPU the code is currently executing on.
  516. // This is likely to change when the OS re-schedules the running thread
  517. // to another CPU.
  518. // If the current core cannot be detected, -1 will be returned.
  519. func (c cpuInfo) logicalcpu() int {
  520. if c.maxFunc < 1 {
  521. return -1
  522. }
  523. _, ebx, _, _ := cpuid(1)
  524. return int(ebx >> 24)
  525. }
  526. // VM Will return true if the cpu id indicates we are in
  527. // a virtual machine. This is only a hint, and will very likely
  528. // have many false negatives.
  529. func (c cpuInfo) vm() bool {
  530. switch c.vendorid {
  531. case msvm, kvm, vmware, xenhvm, bhyve:
  532. return true
  533. }
  534. return false
  535. }
  536. // Flags contains detected cpu features and caracteristics
  537. type flags uint64
  538. // String returns a string representation of the detected
  539. // CPU features.
  540. func (f flags) String() string {
  541. return strings.Join(f.strings(), ",")
  542. }
  543. // Strings returns and array of the detected features.
  544. func (f flags) strings() []string {
  545. s := support()
  546. r := make([]string, 0, 20)
  547. for i := uint(0); i < 64; i++ {
  548. key := flags(1 << i)
  549. val := flagNames[key]
  550. if s&key != 0 {
  551. r = append(r, val)
  552. }
  553. }
  554. return r
  555. }
  556. func maxExtendedFunction() uint32 {
  557. eax, _, _, _ := cpuid(0x80000000)
  558. return eax
  559. }
  560. func maxFunctionID() uint32 {
  561. a, _, _, _ := cpuid(0)
  562. return a
  563. }
  564. func brandName() string {
  565. if maxExtendedFunction() >= 0x80000004 {
  566. v := make([]uint32, 0, 48)
  567. for i := uint32(0); i < 3; i++ {
  568. a, b, c, d := cpuid(0x80000002 + i)
  569. v = append(v, a, b, c, d)
  570. }
  571. return strings.Trim(string(valAsString(v...)), " ")
  572. }
  573. return "unknown"
  574. }
  575. func threadsPerCore() int {
  576. mfi := maxFunctionID()
  577. if mfi < 0x4 || vendorID() != intel {
  578. return 1
  579. }
  580. if mfi < 0xb {
  581. _, b, _, d := cpuid(1)
  582. if (d & (1 << 28)) != 0 {
  583. // v will contain logical core count
  584. v := (b >> 16) & 255
  585. if v > 1 {
  586. a4, _, _, _ := cpuid(4)
  587. // physical cores
  588. v2 := (a4 >> 26) + 1
  589. if v2 > 0 {
  590. return int(v) / int(v2)
  591. }
  592. }
  593. }
  594. return 1
  595. }
  596. _, b, _, _ := cpuidex(0xb, 0)
  597. if b&0xffff == 0 {
  598. return 1
  599. }
  600. return int(b & 0xffff)
  601. }
  602. func logicalCores() int {
  603. mfi := maxFunctionID()
  604. switch vendorID() {
  605. case intel:
  606. // Use this on old Intel processors
  607. if mfi < 0xb {
  608. if mfi < 1 {
  609. return 0
  610. }
  611. // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
  612. // that can be assigned to logical processors in a physical package.
  613. // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
  614. _, ebx, _, _ := cpuid(1)
  615. logical := (ebx >> 16) & 0xff
  616. return int(logical)
  617. }
  618. _, b, _, _ := cpuidex(0xb, 1)
  619. return int(b & 0xffff)
  620. case amd, hygon:
  621. _, b, _, _ := cpuid(1)
  622. return int((b >> 16) & 0xff)
  623. default:
  624. return 0
  625. }
  626. }
  627. func familyModel() (int, int) {
  628. if maxFunctionID() < 0x1 {
  629. return 0, 0
  630. }
  631. eax, _, _, _ := cpuid(1)
  632. family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
  633. model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
  634. return int(family), int(model)
  635. }
  636. func physicalCores() int {
  637. switch vendorID() {
  638. case intel:
  639. return logicalCores() / threadsPerCore()
  640. case amd, hygon:
  641. if maxExtendedFunction() >= 0x80000008 {
  642. _, _, c, _ := cpuid(0x80000008)
  643. return int(c&0xff) + 1
  644. }
  645. }
  646. return 0
  647. }
  648. // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  649. var vendorMapping = map[string]vendor{
  650. "AMDisbetter!": amd,
  651. "AuthenticAMD": amd,
  652. "CentaurHauls": via,
  653. "GenuineIntel": intel,
  654. "TransmetaCPU": transmeta,
  655. "GenuineTMx86": transmeta,
  656. "Geode by NSC": nsc,
  657. "VIA VIA VIA ": via,
  658. "KVMKVMKVMKVM": kvm,
  659. "Microsoft Hv": msvm,
  660. "VMwareVMware": vmware,
  661. "XenVMMXenVMM": xenhvm,
  662. "bhyve bhyve ": bhyve,
  663. "HygonGenuine": hygon,
  664. }
  665. func vendorID() vendor {
  666. _, b, c, d := cpuid(0)
  667. v := valAsString(b, d, c)
  668. vend, ok := vendorMapping[string(v)]
  669. if !ok {
  670. return other
  671. }
  672. return vend
  673. }
  674. func cacheLine() int {
  675. if maxFunctionID() < 0x1 {
  676. return 0
  677. }
  678. _, ebx, _, _ := cpuid(1)
  679. cache := (ebx & 0xff00) >> 5 // cflush size
  680. if cache == 0 && maxExtendedFunction() >= 0x80000006 {
  681. _, _, ecx, _ := cpuid(0x80000006)
  682. cache = ecx & 0xff // cacheline size
  683. }
  684. // TODO: Read from Cache and TLB Information
  685. return int(cache)
  686. }
  687. func (c *cpuInfo) cacheSize() {
  688. c.cache.l1d = -1
  689. c.cache.l1i = -1
  690. c.cache.l2 = -1
  691. c.cache.l3 = -1
  692. vendor := vendorID()
  693. switch vendor {
  694. case intel:
  695. if maxFunctionID() < 4 {
  696. return
  697. }
  698. for i := uint32(0); ; i++ {
  699. eax, ebx, ecx, _ := cpuidex(4, i)
  700. cacheType := eax & 15
  701. if cacheType == 0 {
  702. break
  703. }
  704. cacheLevel := (eax >> 5) & 7
  705. coherency := int(ebx&0xfff) + 1
  706. partitions := int((ebx>>12)&0x3ff) + 1
  707. associativity := int((ebx>>22)&0x3ff) + 1
  708. sets := int(ecx) + 1
  709. size := associativity * partitions * coherency * sets
  710. switch cacheLevel {
  711. case 1:
  712. if cacheType == 1 {
  713. // 1 = Data Cache
  714. c.cache.l1d = size
  715. } else if cacheType == 2 {
  716. // 2 = Instruction Cache
  717. c.cache.l1i = size
  718. } else {
  719. if c.cache.l1d < 0 {
  720. c.cache.l1i = size
  721. }
  722. if c.cache.l1i < 0 {
  723. c.cache.l1i = size
  724. }
  725. }
  726. case 2:
  727. c.cache.l2 = size
  728. case 3:
  729. c.cache.l3 = size
  730. }
  731. }
  732. case amd, hygon:
  733. // Untested.
  734. if maxExtendedFunction() < 0x80000005 {
  735. return
  736. }
  737. _, _, ecx, edx := cpuid(0x80000005)
  738. c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024)
  739. c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024)
  740. if maxExtendedFunction() < 0x80000006 {
  741. return
  742. }
  743. _, _, ecx, _ = cpuid(0x80000006)
  744. c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024)
  745. }
  746. return
  747. }
  748. type sgxepcsection struct {
  749. baseaddress uint64
  750. epcsize uint64
  751. }
  752. type sgxsupport struct {
  753. available bool
  754. launchcontrol bool
  755. sgx1supported bool
  756. sgx2supported bool
  757. maxenclavesizenot64 int64
  758. maxenclavesize64 int64
  759. epcsections []sgxepcsection
  760. }
  761. func hasSGX(available, lc bool) (rval sgxsupport) {
  762. rval.available = available
  763. if !available {
  764. return
  765. }
  766. rval.launchcontrol = lc
  767. a, _, _, d := cpuidex(0x12, 0)
  768. rval.sgx1supported = a&0x01 != 0
  769. rval.sgx2supported = a&0x02 != 0
  770. rval.maxenclavesizenot64 = 1 << (d & 0xFF) // pow 2
  771. rval.maxenclavesize64 = 1 << ((d >> 8) & 0xFF) // pow 2
  772. rval.epcsections = make([]sgxepcsection, 0)
  773. for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
  774. eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
  775. leafType := eax & 0xf
  776. if leafType == 0 {
  777. // Invalid subleaf, stop iterating
  778. break
  779. } else if leafType == 1 {
  780. // EPC Section subleaf
  781. baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
  782. size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
  783. section := sgxepcsection{baseaddress: baseAddress, epcsize: size}
  784. rval.epcsections = append(rval.epcsections, section)
  785. }
  786. }
  787. return
  788. }
  789. func support() flags {
  790. mfi := maxFunctionID()
  791. vend := vendorID()
  792. if mfi < 0x1 {
  793. return 0
  794. }
  795. rval := uint64(0)
  796. _, _, c, d := cpuid(1)
  797. if (d & (1 << 15)) != 0 {
  798. rval |= cmov
  799. }
  800. if (d & (1 << 23)) != 0 {
  801. rval |= mmx
  802. }
  803. if (d & (1 << 25)) != 0 {
  804. rval |= mmxext
  805. }
  806. if (d & (1 << 25)) != 0 {
  807. rval |= sse
  808. }
  809. if (d & (1 << 26)) != 0 {
  810. rval |= sse2
  811. }
  812. if (c & 1) != 0 {
  813. rval |= sse3
  814. }
  815. if (c & (1 << 5)) != 0 {
  816. rval |= vmx
  817. }
  818. if (c & 0x00000200) != 0 {
  819. rval |= ssse3
  820. }
  821. if (c & 0x00080000) != 0 {
  822. rval |= sse4
  823. }
  824. if (c & 0x00100000) != 0 {
  825. rval |= sse42
  826. }
  827. if (c & (1 << 25)) != 0 {
  828. rval |= aesni
  829. }
  830. if (c & (1 << 1)) != 0 {
  831. rval |= clmul
  832. }
  833. if c&(1<<23) != 0 {
  834. rval |= popcnt
  835. }
  836. if c&(1<<30) != 0 {
  837. rval |= rdrand
  838. }
  839. if c&(1<<29) != 0 {
  840. rval |= f16c
  841. }
  842. if c&(1<<13) != 0 {
  843. rval |= cx16
  844. }
  845. if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 {
  846. if threadsPerCore() > 1 {
  847. rval |= htt
  848. }
  849. }
  850. // Check XGETBV, OXSAVE and AVX bits
  851. if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
  852. // Check for OS support
  853. eax, _ := xgetbv(0)
  854. if (eax & 0x6) == 0x6 {
  855. rval |= avx
  856. if (c & 0x00001000) != 0 {
  857. rval |= fma3
  858. }
  859. }
  860. }
  861. // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  862. if mfi >= 7 {
  863. _, ebx, ecx, edx := cpuidex(7, 0)
  864. eax1, _, _, _ := cpuidex(7, 1)
  865. if (rval&avx) != 0 && (ebx&0x00000020) != 0 {
  866. rval |= avx2
  867. }
  868. if (ebx & 0x00000008) != 0 {
  869. rval |= bmi1
  870. if (ebx & 0x00000100) != 0 {
  871. rval |= bmi2
  872. }
  873. }
  874. if ebx&(1<<2) != 0 {
  875. rval |= sgx
  876. }
  877. if ebx&(1<<4) != 0 {
  878. rval |= hle
  879. }
  880. if ebx&(1<<9) != 0 {
  881. rval |= erms
  882. }
  883. if ebx&(1<<11) != 0 {
  884. rval |= rtm
  885. }
  886. if ebx&(1<<14) != 0 {
  887. rval |= mpx
  888. }
  889. if ebx&(1<<18) != 0 {
  890. rval |= rdseed
  891. }
  892. if ebx&(1<<19) != 0 {
  893. rval |= adx
  894. }
  895. if ebx&(1<<29) != 0 {
  896. rval |= sha
  897. }
  898. if edx&(1<<26) != 0 {
  899. rval |= ibpb
  900. }
  901. if ecx&(1<<30) != 0 {
  902. rval |= sgxlc
  903. }
  904. if edx&(1<<27) != 0 {
  905. rval |= stibp
  906. }
  907. // Only detect AVX-512 features if XGETBV is supported
  908. if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  909. // Check for OS support
  910. eax, _ := xgetbv(0)
  911. // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  912. // ZMM16-ZMM31 state are enabled by OS)
  913. /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  914. if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
  915. if ebx&(1<<16) != 0 {
  916. rval |= avx512f
  917. }
  918. if ebx&(1<<17) != 0 {
  919. rval |= avx512dq
  920. }
  921. if ebx&(1<<21) != 0 {
  922. rval |= avx512ifma
  923. }
  924. if ebx&(1<<26) != 0 {
  925. rval |= avx512pf
  926. }
  927. if ebx&(1<<27) != 0 {
  928. rval |= avx512er
  929. }
  930. if ebx&(1<<28) != 0 {
  931. rval |= avx512cd
  932. }
  933. if ebx&(1<<30) != 0 {
  934. rval |= avx512bw
  935. }
  936. if ebx&(1<<31) != 0 {
  937. rval |= avx512vl
  938. }
  939. // ecx
  940. if ecx&(1<<1) != 0 {
  941. rval |= avx512vbmi
  942. }
  943. if ecx&(1<<6) != 0 {
  944. rval |= avx512vbmi2
  945. }
  946. if ecx&(1<<8) != 0 {
  947. rval |= gfni
  948. }
  949. if ecx&(1<<9) != 0 {
  950. rval |= vaes
  951. }
  952. if ecx&(1<<10) != 0 {
  953. rval |= vpclmulqdq
  954. }
  955. if ecx&(1<<11) != 0 {
  956. rval |= avx512vnni
  957. }
  958. if ecx&(1<<12) != 0 {
  959. rval |= avx512bitalg
  960. }
  961. if ecx&(1<<14) != 0 {
  962. rval |= avx512vpopcntdq
  963. }
  964. // edx
  965. if edx&(1<<8) != 0 {
  966. rval |= avx512vp2intersect
  967. }
  968. // cpuid eax 07h,ecx=1
  969. if eax1&(1<<5) != 0 {
  970. rval |= avx512bf16
  971. }
  972. }
  973. }
  974. }
  975. if maxExtendedFunction() >= 0x80000001 {
  976. _, _, c, d := cpuid(0x80000001)
  977. if (c & (1 << 5)) != 0 {
  978. rval |= lzcnt
  979. rval |= popcnt
  980. }
  981. if (d & (1 << 31)) != 0 {
  982. rval |= amd3dnow
  983. }
  984. if (d & (1 << 30)) != 0 {
  985. rval |= amd3dnowext
  986. }
  987. if (d & (1 << 23)) != 0 {
  988. rval |= mmx
  989. }
  990. if (d & (1 << 22)) != 0 {
  991. rval |= mmxext
  992. }
  993. if (c & (1 << 6)) != 0 {
  994. rval |= sse4a
  995. }
  996. if d&(1<<20) != 0 {
  997. rval |= nx
  998. }
  999. if d&(1<<27) != 0 {
  1000. rval |= rdtscp
  1001. }
  1002. /* Allow for selectively disabling SSE2 functions on AMD processors
  1003. with SSE2 support but not SSE4a. This includes Athlon64, some
  1004. Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
  1005. than SSE2 often enough to utilize this special-case flag.
  1006. AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
  1007. so that SSE2 is used unless explicitly disabled by checking
  1008. AV_CPU_FLAG_SSE2SLOW. */
  1009. if vendorID() != intel &&
  1010. rval&sse2 != 0 && (c&0x00000040) == 0 {
  1011. rval |= sse2slow
  1012. }
  1013. /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  1014. * used unless the OS has AVX support. */
  1015. if (rval & avx) != 0 {
  1016. if (c & 0x00000800) != 0 {
  1017. rval |= xop
  1018. }
  1019. if (c & 0x00010000) != 0 {
  1020. rval |= fma4
  1021. }
  1022. }
  1023. if vendorID() == intel {
  1024. family, model := familyModel()
  1025. if family == 6 && (model == 9 || model == 13 || model == 14) {
  1026. /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
  1027. * 6/14 (core1 "yonah") theoretically support sse2, but it's
  1028. * usually slower than mmx. */
  1029. if (rval & sse2) != 0 {
  1030. rval |= sse2slow
  1031. }
  1032. if (rval & sse3) != 0 {
  1033. rval |= sse3slow
  1034. }
  1035. }
  1036. /* The Atom processor has SSSE3 support, which is useful in many cases,
  1037. * but sometimes the SSSE3 version is slower than the SSE2 equivalent
  1038. * on the Atom, but is generally faster on other processors supporting
  1039. * SSSE3. This flag allows for selectively disabling certain SSSE3
  1040. * functions on the Atom. */
  1041. if family == 6 && model == 28 {
  1042. rval |= atom
  1043. }
  1044. }
  1045. }
  1046. return flags(rval)
  1047. }
  1048. func valAsString(values ...uint32) []byte {
  1049. r := make([]byte, 4*len(values))
  1050. for i, v := range values {
  1051. dst := r[i*4:]
  1052. dst[0] = byte(v & 0xff)
  1053. dst[1] = byte((v >> 8) & 0xff)
  1054. dst[2] = byte((v >> 16) & 0xff)
  1055. dst[3] = byte((v >> 24) & 0xff)
  1056. switch {
  1057. case dst[0] == 0:
  1058. return r[:i*4]
  1059. case dst[1] == 0:
  1060. return r[:i*4+1]
  1061. case dst[2] == 0:
  1062. return r[:i*4+2]
  1063. case dst[3] == 0:
  1064. return r[:i*4+3]
  1065. }
  1066. }
  1067. return r
  1068. }