gen-insn-attr-x86.awk 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. #!/bin/awk -f
  2. # gen-insn-attr-x86.awk: Instruction attribute table generator
  3. # Written by Masami Hiramatsu <[email protected]>
  4. #
  5. # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
  6. # Awk implementation sanity check
  7. function check_awk_implement() {
  8. if (sprintf("%x", 0) != "0")
  9. return "Your awk has a printf-format problem."
  10. return ""
  11. }
  12. # Clear working vars
  13. function clear_vars() {
  14. delete table
  15. delete lptable2
  16. delete lptable1
  17. delete lptable3
  18. eid = -1 # escape id
  19. gid = -1 # group id
  20. aid = -1 # AVX id
  21. tname = ""
  22. }
  23. BEGIN {
  24. # Implementation error checking
  25. awkchecked = check_awk_implement()
  26. if (awkchecked != "") {
  27. print "Error: " awkchecked > "/dev/stderr"
  28. print "Please try to use gawk." > "/dev/stderr"
  29. exit 1
  30. }
  31. # Setup generating tables
  32. print "/* x86 opcode map generated from x86-opcode-map.txt */"
  33. print "/* Do not change this code. */\n"
  34. ggid = 1
  35. geid = 1
  36. gaid = 0
  37. delete etable
  38. delete gtable
  39. delete atable
  40. opnd_expr = "^[A-Za-z/]"
  41. ext_expr = "^\\("
  42. sep_expr = "^\\|$"
  43. group_expr = "^Grp[0-9A-Za-z]+"
  44. imm_expr = "^[IJAOL][a-z]"
  45. imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
  46. imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
  47. imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
  48. imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
  49. imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
  50. imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
  51. imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
  52. imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
  53. imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
  54. imm_flag["Ob"] = "INAT_MOFFSET"
  55. imm_flag["Ov"] = "INAT_MOFFSET"
  56. imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
  57. modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
  58. force64_expr = "\\([df]64\\)"
  59. rex_expr = "^REX(\\.[XRWB]+)*"
  60. fpu_expr = "^ESC" # TODO
  61. lprefix1_expr = "\\((66|!F3)\\)"
  62. lprefix2_expr = "\\(F3\\)"
  63. lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
  64. lprefix_expr = "\\((66|F2|F3)\\)"
  65. max_lprefix = 4
  66. # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
  67. # accepts VEX prefix
  68. vexok_opcode_expr = "^[vk].*"
  69. vexok_expr = "\\(v1\\)"
  70. # All opcodes with (v) superscript supports *only* VEX prefix
  71. vexonly_expr = "\\(v\\)"
  72. # All opcodes with (ev) superscript supports *only* EVEX prefix
  73. evexonly_expr = "\\(ev\\)"
  74. prefix_expr = "\\(Prefix\\)"
  75. prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
  76. prefix_num["REPNE"] = "INAT_PFX_REPNE"
  77. prefix_num["REP/REPE"] = "INAT_PFX_REPE"
  78. prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
  79. prefix_num["XRELEASE"] = "INAT_PFX_REPE"
  80. prefix_num["LOCK"] = "INAT_PFX_LOCK"
  81. prefix_num["SEG=CS"] = "INAT_PFX_CS"
  82. prefix_num["SEG=DS"] = "INAT_PFX_DS"
  83. prefix_num["SEG=ES"] = "INAT_PFX_ES"
  84. prefix_num["SEG=FS"] = "INAT_PFX_FS"
  85. prefix_num["SEG=GS"] = "INAT_PFX_GS"
  86. prefix_num["SEG=SS"] = "INAT_PFX_SS"
  87. prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
  88. prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
  89. prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
  90. prefix_num["EVEX"] = "INAT_PFX_EVEX"
  91. clear_vars()
  92. }
  93. function semantic_error(msg) {
  94. print "Semantic error at " NR ": " msg > "/dev/stderr"
  95. exit 1
  96. }
  97. function debug(msg) {
  98. print "DEBUG: " msg
  99. }
  100. function array_size(arr, i,c) {
  101. c = 0
  102. for (i in arr)
  103. c++
  104. return c
  105. }
  106. /^Table:/ {
  107. print "/* " $0 " */"
  108. if (tname != "")
  109. semantic_error("Hit Table: before EndTable:.");
  110. }
  111. /^Referrer:/ {
  112. if (NF != 1) {
  113. # escape opcode table
  114. ref = ""
  115. for (i = 2; i <= NF; i++)
  116. ref = ref $i
  117. eid = escape[ref]
  118. tname = sprintf("inat_escape_table_%d", eid)
  119. }
  120. }
  121. /^AVXcode:/ {
  122. if (NF != 1) {
  123. # AVX/escape opcode table
  124. aid = $2
  125. if (gaid <= aid)
  126. gaid = aid + 1
  127. if (tname == "") # AVX only opcode table
  128. tname = sprintf("inat_avx_table_%d", $2)
  129. }
  130. if (aid == -1 && eid == -1) # primary opcode table
  131. tname = "inat_primary_table"
  132. }
  133. /^GrpTable:/ {
  134. print "/* " $0 " */"
  135. if (!($2 in group))
  136. semantic_error("No group: " $2 )
  137. gid = group[$2]
  138. tname = "inat_group_table_" gid
  139. }
  140. function print_table(tbl,name,fmt,n)
  141. {
  142. print "const insn_attr_t " name " = {"
  143. for (i = 0; i < n; i++) {
  144. id = sprintf(fmt, i)
  145. if (tbl[id])
  146. print " [" id "] = " tbl[id] ","
  147. }
  148. print "};"
  149. }
  150. /^EndTable/ {
  151. if (gid != -1) {
  152. # print group tables
  153. if (array_size(table) != 0) {
  154. print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
  155. "0x%x", 8)
  156. gtable[gid,0] = tname
  157. }
  158. if (array_size(lptable1) != 0) {
  159. print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
  160. "0x%x", 8)
  161. gtable[gid,1] = tname "_1"
  162. }
  163. if (array_size(lptable2) != 0) {
  164. print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
  165. "0x%x", 8)
  166. gtable[gid,2] = tname "_2"
  167. }
  168. if (array_size(lptable3) != 0) {
  169. print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
  170. "0x%x", 8)
  171. gtable[gid,3] = tname "_3"
  172. }
  173. } else {
  174. # print primary/escaped tables
  175. if (array_size(table) != 0) {
  176. print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
  177. "0x%02x", 256)
  178. etable[eid,0] = tname
  179. if (aid >= 0)
  180. atable[aid,0] = tname
  181. }
  182. if (array_size(lptable1) != 0) {
  183. print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
  184. "0x%02x", 256)
  185. etable[eid,1] = tname "_1"
  186. if (aid >= 0)
  187. atable[aid,1] = tname "_1"
  188. }
  189. if (array_size(lptable2) != 0) {
  190. print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
  191. "0x%02x", 256)
  192. etable[eid,2] = tname "_2"
  193. if (aid >= 0)
  194. atable[aid,2] = tname "_2"
  195. }
  196. if (array_size(lptable3) != 0) {
  197. print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
  198. "0x%02x", 256)
  199. etable[eid,3] = tname "_3"
  200. if (aid >= 0)
  201. atable[aid,3] = tname "_3"
  202. }
  203. }
  204. print ""
  205. clear_vars()
  206. }
  207. function add_flags(old,new) {
  208. if (old && new)
  209. return old " | " new
  210. else if (old)
  211. return old
  212. else
  213. return new
  214. }
  215. # convert operands to flags.
  216. function convert_operands(count,opnd, i,j,imm,mod)
  217. {
  218. imm = null
  219. mod = null
  220. for (j = 1; j <= count; j++) {
  221. i = opnd[j]
  222. if (match(i, imm_expr) == 1) {
  223. if (!imm_flag[i])
  224. semantic_error("Unknown imm opnd: " i)
  225. if (imm) {
  226. if (i != "Ib")
  227. semantic_error("Second IMM error")
  228. imm = add_flags(imm, "INAT_SCNDIMM")
  229. } else
  230. imm = imm_flag[i]
  231. } else if (match(i, modrm_expr))
  232. mod = "INAT_MODRM"
  233. }
  234. return add_flags(imm, mod)
  235. }
  236. /^[0-9a-f]+\:/ {
  237. if (NR == 1)
  238. next
  239. # get index
  240. idx = "0x" substr($1, 1, index($1,":") - 1)
  241. if (idx in table)
  242. semantic_error("Redefine " idx " in " tname)
  243. # check if escaped opcode
  244. if ("escape" == $2) {
  245. if ($3 != "#")
  246. semantic_error("No escaped name")
  247. ref = ""
  248. for (i = 4; i <= NF; i++)
  249. ref = ref $i
  250. if (ref in escape)
  251. semantic_error("Redefine escape (" ref ")")
  252. escape[ref] = geid
  253. geid++
  254. table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
  255. next
  256. }
  257. variant = null
  258. # converts
  259. i = 2
  260. while (i <= NF) {
  261. opcode = $(i++)
  262. delete opnds
  263. ext = null
  264. flags = null
  265. opnd = null
  266. # parse one opcode
  267. if (match($i, opnd_expr)) {
  268. opnd = $i
  269. count = split($(i++), opnds, ",")
  270. flags = convert_operands(count, opnds)
  271. }
  272. if (match($i, ext_expr))
  273. ext = $(i++)
  274. if (match($i, sep_expr))
  275. i++
  276. else if (i < NF)
  277. semantic_error($i " is not a separator")
  278. # check if group opcode
  279. if (match(opcode, group_expr)) {
  280. if (!(opcode in group)) {
  281. group[opcode] = ggid
  282. ggid++
  283. }
  284. flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
  285. }
  286. # check force(or default) 64bit
  287. if (match(ext, force64_expr))
  288. flags = add_flags(flags, "INAT_FORCE64")
  289. # check REX prefix
  290. if (match(opcode, rex_expr))
  291. flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
  292. # check coprocessor escape : TODO
  293. if (match(opcode, fpu_expr))
  294. flags = add_flags(flags, "INAT_MODRM")
  295. # check VEX codes
  296. if (match(ext, evexonly_expr))
  297. flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
  298. else if (match(ext, vexonly_expr))
  299. flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
  300. else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
  301. flags = add_flags(flags, "INAT_VEXOK")
  302. # check prefixes
  303. if (match(ext, prefix_expr)) {
  304. if (!prefix_num[opcode])
  305. semantic_error("Unknown prefix: " opcode)
  306. flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
  307. }
  308. if (length(flags) == 0)
  309. continue
  310. # check if last prefix
  311. if (match(ext, lprefix1_expr)) {
  312. lptable1[idx] = add_flags(lptable1[idx],flags)
  313. variant = "INAT_VARIANT"
  314. }
  315. if (match(ext, lprefix2_expr)) {
  316. lptable2[idx] = add_flags(lptable2[idx],flags)
  317. variant = "INAT_VARIANT"
  318. }
  319. if (match(ext, lprefix3_expr)) {
  320. lptable3[idx] = add_flags(lptable3[idx],flags)
  321. variant = "INAT_VARIANT"
  322. }
  323. if (!match(ext, lprefix_expr)){
  324. table[idx] = add_flags(table[idx],flags)
  325. }
  326. }
  327. if (variant)
  328. table[idx] = add_flags(table[idx],variant)
  329. }
  330. END {
  331. if (awkchecked != "")
  332. exit 1
  333. # print escape opcode map's array
  334. print "/* Escape opcode map array */"
  335. print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
  336. "[INAT_LSTPFX_MAX + 1] = {"
  337. for (i = 0; i < geid; i++)
  338. for (j = 0; j < max_lprefix; j++)
  339. if (etable[i,j])
  340. print " ["i"]["j"] = "etable[i,j]","
  341. print "};\n"
  342. # print group opcode map's array
  343. print "/* Group opcode map array */"
  344. print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
  345. "[INAT_LSTPFX_MAX + 1] = {"
  346. for (i = 0; i < ggid; i++)
  347. for (j = 0; j < max_lprefix; j++)
  348. if (gtable[i,j])
  349. print " ["i"]["j"] = "gtable[i,j]","
  350. print "};\n"
  351. # print AVX opcode map's array
  352. print "/* AVX opcode map array */"
  353. print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
  354. "[INAT_LSTPFX_MAX + 1] = {"
  355. for (i = 0; i < gaid; i++)
  356. for (j = 0; j < max_lprefix; j++)
  357. if (atable[i,j])
  358. print " ["i"]["j"] = "atable[i,j]","
  359. print "};"
  360. }