Skip to content

Commit

Permalink
cmd/compile/mips: intrinsify bits.RotateLeft32 on MIPS
Browse files Browse the repository at this point in the history
This CL implements the ROTR & ROTRV instructions for
MIPS and MIPS64, which are mips32r2 instructions.

Additionally bits.RotateLeft32 is now instrinsic and will be
rewritten to ROTR during the SSA phase.

This brings roughly a 65-70% improvement on mipsle
code running Chacha20Poly1305 on a MT7688:

goos: linux
goarch: mipsle
pkg: golang.org/x/crypto/chacha20poly1305
name                         old time/op    new time/op    delta
Chacha20Poly1305/Open-16       56.2µs ±20%    38.5µs ±40%   -31.45%  (p=0.001 n=8+10)
Chacha20Poly1305/Seal-16       68.3µs ±49%    30.6µs ±13%   -55.14%  (p=0.000 n=10+10)
Chacha20Poly1305/Open-64       67.5µs ±22%    37.8µs ±19%   -43.98%  (p=0.000 n=9+9)
Chacha20Poly1305/Seal-64       64.7µs ±10%    37.6µs ± 8%   -41.96%  (p=0.000 n=9+8)
Chacha20Poly1305/Open-256       151µs ±13%      89µs ±20%   -41.03%  (p=0.000 n=9+10)
Chacha20Poly1305/Seal-256       148µs ±19%      93µs ±35%   -37.15%  (p=0.000 n=10+10)
Chacha20Poly1305/Open-1024      456µs ±16%     260µs ±23%   -42.95%  (p=0.000 n=10+10)
Chacha20Poly1305/Seal-1024      469µs ±14%     254µs ±15%   -45.88%  (p=0.000 n=10+9)
Chacha20Poly1305/Open-8192     3.59ms ±23%    1.94ms ±15%   -45.86%  (p=0.000 n=10+10)
Chacha20Poly1305/Seal-8192     3.47ms ±20%    2.03ms ±22%   -41.60%  (p=0.000 n=9+10)
Chacha20Poly1305/Open-16384    7.01ms ± 9%    4.22ms ±22%   -39.89%  (p=0.000 n=9+10)
Chacha20Poly1305/Seal-16384    7.43ms ±19%    4.23ms ±11%   -43.04%  (p=0.000 n=10+9)

name                         old speed      new speed      delta
Chacha20Poly1305/Open-16      258kB/s ±46%   431kB/s ±32%   +67.05%  (p=0.000 n=10+10)
Chacha20Poly1305/Seal-16      246kB/s ±35%   527kB/s ±13%  +114.23%  (p=0.000 n=10+10)
Chacha20Poly1305/Open-64      927kB/s ±31%  1664kB/s ±22%   +79.50%  (p=0.000 n=10+10)
Chacha20Poly1305/Seal-64      993kB/s ±10%  1709kB/s ± 8%   +72.02%  (p=0.000 n=9+8)
Chacha20Poly1305/Open-256    1.70MB/s ±13%  2.90MB/s ±18%   +70.88%  (p=0.000 n=9+10)
Chacha20Poly1305/Seal-256    1.74MB/s ±17%  2.81MB/s ±28%   +61.16%  (p=0.000 n=10+10)
Chacha20Poly1305/Open-1024   2.26MB/s ±15%  3.99MB/s ±20%   +76.38%  (p=0.000 n=10+10)
Chacha20Poly1305/Seal-1024   2.20MB/s ±13%  3.92MB/s ±32%   +78.82%  (p=0.000 n=10+10)
Chacha20Poly1305/Open-8192   2.31MB/s ±19%  4.24MB/s ±14%   +83.72%  (p=0.000 n=10+10)
Chacha20Poly1305/Seal-8192   2.30MB/s ±29%  4.09MB/s ±19%   +77.66%  (p=0.000 n=10+10)
Chacha20Poly1305/Open-16384  2.34MB/s ±10%  3.93MB/s ±19%   +68.04%  (p=0.000 n=9+10)
Chacha20Poly1305/Seal-16384  2.23MB/s ±17%  3.79MB/s ±23%   +70.00%  (p=0.000 n=10+10)

Fixes golang#39139
  • Loading branch information
stffabi committed Mar 15, 2021
1 parent 7bfe32f commit d438ffc
Show file tree
Hide file tree
Showing 13 changed files with 122 additions and 37 deletions.
6 changes: 4 additions & 2 deletions src/cmd/compile/internal/mips/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpMIPSMULD,
ssa.OpMIPSDIVF,
ssa.OpMIPSDIVD,
ssa.OpMIPSMUL:
ssa.OpMIPSMUL,
ssa.OpMIPSROTR:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
Expand Down Expand Up @@ -201,7 +202,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpMIPSSRLconst,
ssa.OpMIPSSRAconst,
ssa.OpMIPSSGTconst,
ssa.OpMIPSSGTUconst:
ssa.OpMIPSSGTUconst,
ssa.OpMIPSROTRconst:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
Expand Down
6 changes: 4 additions & 2 deletions src/cmd/compile/internal/mips64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpMIPS64MULF,
ssa.OpMIPS64MULD,
ssa.OpMIPS64DIVF,
ssa.OpMIPS64DIVD:
ssa.OpMIPS64DIVD,
ssa.OpMIPS64ROTR:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
Expand All @@ -195,7 +196,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpMIPS64SRLVconst,
ssa.OpMIPS64SRAVconst,
ssa.OpMIPS64SGTconst,
ssa.OpMIPS64SGTUconst:
ssa.OpMIPS64SGTUconst,
ssa.OpMIPS64ROTRconst:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
Expand Down
3 changes: 2 additions & 1 deletion src/cmd/compile/internal/ssa/gen/MIPS.rules
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@
// rotates
(RotateLeft8 <t> x (MOVWconst [c])) => (Or8 (Lsh8x32 <t> x (MOVWconst [c&7])) (Rsh8Ux32 <t> x (MOVWconst [-c&7])))
(RotateLeft16 <t> x (MOVWconst [c])) => (Or16 (Lsh16x32 <t> x (MOVWconst [c&15])) (Rsh16Ux32 <t> x (MOVWconst [-c&15])))
(RotateLeft32 <t> x (MOVWconst [c])) => (Or32 (Lsh32x32 <t> x (MOVWconst [c&31])) (Rsh32Ux32 <t> x (MOVWconst [-c&31])))
(RotateLeft32 x (MOVWconst [c])) => (ROTRconst x [-c&31])
(RotateLeft32 x y) => (ROTR x (NEG <y.Type> y))
(RotateLeft64 <t> x (MOVWconst [c])) => (Or64 (Lsh64x32 <t> x (MOVWconst [c&63])) (Rsh64Ux32 <t> x (MOVWconst [-c&63])))

// unary ops
Expand Down
3 changes: 2 additions & 1 deletion src/cmd/compile/internal/ssa/gen/MIPS64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@
// rotates
(RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
(RotateLeft16 <t> x (MOVVconst [c])) => (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15])))
(RotateLeft32 <t> x (MOVVconst [c])) => (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31])))
(RotateLeft32 x (MOVVconst [c])) => (ROTRconst x [int32(-c&31)])
(RotateLeft32 x y) => (ROTR x (NEGV <y.Type> y))
(RotateLeft64 <t> x (MOVVconst [c])) => (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63])))

// unary ops
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/compile/internal/ssa/gen/MIPS64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ func init() {
{name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"}, // arg0 >> auxInt, unsigned
{name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"}, // arg0 >> arg1, signed, shift amount is mod 64
{name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"}, // arg0 >> auxInt, signed
{name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"}, // arg0 right rotate by (arg1 mod 32) bits
{name: "ROTRconst", argLength: 1, reg: gp11, asm: "ROTR", aux: "Int32"}, // arg0 right rotate by auxInt bits

// comparisons
{name: "SGT", argLength: 2, reg: gp21, asm: "SGT", typ: "Bool"}, // 1 if arg0 > arg1 (signed), 0 otherwise
Expand Down
3 changes: 3 additions & 0 deletions src/cmd/compile/internal/ssa/gen/MIPSOps.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ func init() {
{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> arg1, signed, shift amount is mod 32
{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed, shift amount must be 0 through 31 inclusive

{name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"}, // arg0 right rotate by (arg1 mod 32) bits
{name: "ROTRconst", argLength: 1, reg: gp11, asm: "ROTR", aux: "Int32"}, // arg0 right rotate by auxInt bits

{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"},

// comparisons
Expand Down
60 changes: 60 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 16 additions & 15 deletions src/cmd/compile/internal/ssa/rewriteMIPS.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 16 additions & 15 deletions src/cmd/compile/internal/ssa/rewriteMIPS64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/cmd/compile/internal/ssagen/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -4394,7 +4394,7 @@ func InitTables() {
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1])
},
sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm, sys.MIPS, sys.MIPS64)
addF("math/bits", "RotateLeft64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1])
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/obj/mips/a.out.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ const (
AREM
AREMU
ARFE
AROTR
ASC
ASCV
ASGT
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/obj/mips/anames.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions src/cmd/internal/obj/mips/asm0.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ var optab = []Optab{
{ASLLV, C_REG, C_REG, C_REG, 9, 4, 0, sys.MIPS64, 0},
{ACLO, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},

{AROTR, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},
{AROTR, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0},
{AROTR, C_SCON, C_REG, C_REG, 16, 4, 0, 0, 0},
{AROTR, C_SCON, C_NONE, C_REG, 16, 4, 0, 0, 0},

{AADDF, C_FREG, C_NONE, C_FREG, 32, 4, 0, 0, 0},
{AADDF, C_FREG, C_REG, C_FREG, 32, 4, 0, 0, 0},
{ACMPEQF, C_FREG, C_REG, C_NONE, 32, 4, 0, 0, 0},
Expand Down Expand Up @@ -1079,6 +1084,7 @@ func buildop(ctxt *obj.Link) {
ANEGW,
ANEGV,
AWORD,
AROTR,
obj.ANOP,
obj.ATEXT,
obj.AUNDEF,
Expand Down Expand Up @@ -1730,6 +1736,8 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
return OP(0, 4)
case ASRL:
return OP(0, 6)
case AROTR:
return OP(0, 6) | (1 << 6)
case ASRA:
return OP(0, 7)
case ASLLV:
Expand Down Expand Up @@ -1914,6 +1922,8 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
return OP(0, 0)
case ASRL:
return OP(0, 2)
case AROTR:
return OP(0, 2) | (1 << 21)
case ASRA:
return OP(0, 3)
case AADDV:
Expand Down

0 comments on commit d438ffc

Please sign in to comment.