a := tensor.New(tensor.Of(tensor.Float64), tensor.WithShape(2,3)) // allocate []float64 in CPU
b := tensor.new(tensor.Of(tensor.Float64), tensor.WithShape(3)) // allocate []float64 in CPU
// set the engine AFTER the values have been allocated
e := newEngine(cudaCtx)
tensor.WithEngine(e)(a)
tensor.WithEngine(e)(b)
ad := a.Data().([]float64)
for i := range ad {
ad[i] = float64(i + 1)
}
bd := b.Data().([]float64)
for i := range bd {
bd[i] = float64(i + 1)
}
var err error
// c2, err = tensor.MatVecMul(a, b, tensor.WithReuse(c))
err = e.MatVecMul(a, b, c)
if err != nil || e.Standard.Err() != nil {
log.Println(err)
fmt.Println(e.Standard.Err())
} else {
fmt.Printf("c %v\n", c.Data())
}
⎡1 2 3⎤
⎣4 5 6⎦
[1 2 3]
c [1000 1000]
2018/07/14 11:50:56 impl.e <nil>