Skip to content

Commit 4a6f582

Browse files
xiang1guoTaoLv
authored andcommitted
gtest: graph: unit: test differnet constant weight tensor cache
1 parent 1522291 commit 4a6f582

File tree

1 file changed

+166
-8
lines changed

1 file changed

+166
-8
lines changed

tests/gtests/graph/unit/backend/dnnl/test_matmul.cpp

+166-8
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "graph/unit/utils.hpp"
2727

2828
#include "backend/dnnl/dnnl_constant_tensor_cache.hpp"
29+
#include "oneapi/dnnl/dnnl_graph.hpp"
2930

3031
namespace graph = dnnl::impl::graph;
3132
namespace utils = dnnl::graph::tests::unit::utils;
@@ -7968,6 +7969,19 @@ TEST(test_matmul_execute_subgraph_int8, ShareCachedWeight) {
79687969
std::vector<std::vector<int64_t>> dst_shapes {{4, 4096}, {32, 4096}};
79697970
std::vector<std::shared_ptr<graph::compiled_partition_t>> cps;
79707971
size_t prv_cache_size = 0;
7972+
7973+
// random generate src, weight and bias data random seed = 7.
7974+
// Weight tensor keeps same for different src.
7975+
std::default_random_engine generator(7);
7976+
std::uniform_real_distribution<float> s8_distribution(-127.0f, 128.0f);
7977+
std::vector<int8_t> weight_data(product(weight_shape));
7978+
std::generate(weight_data.begin(), weight_data.end(),
7979+
[&]() { return static_cast<int8_t>(s8_distribution(generator)); });
7980+
test_tensor weight_s8_ts(weight_s8, engine, weight_data);
7981+
7982+
// set constant tensor cache capacity as 1GB
7983+
dnnl::graph::set_constant_tensor_cache_capacity(
7984+
static_cast<engine::kind>(engine->kind()), 1024);
79717985
for (size_t i = 0; i < src_shapes.size(); ++i) {
79727986
std::vector<int64_t> src_shape = src_shapes[i];
79737987
std::vector<int64_t> dst_shape = dst_shapes[i];
@@ -7988,23 +8002,162 @@ TEST(test_matmul_execute_subgraph_int8, ShareCachedWeight) {
79888002
cp.query_logical_tensor(dst_s8.id, &compiled_output);
79898003

79908004
std::vector<uint8_t> src_data(product(src_shape));
7991-
std::vector<int8_t> weight_data(product(weight_shape));
79928005

7993-
// random generate src, weight and bias data random seed = 7
7994-
std::default_random_engine generator(7);
79958006
std::uniform_real_distribution<float> u8_distribution(0.0f, 255.0f);
7996-
std::uniform_real_distribution<float> s8_distribution(-127.0f, 128.0f);
79978007
std::generate(src_data.begin(), src_data.end(), [&]() {
79988008
return static_cast<uint8_t>(u8_distribution(generator));
79998009
});
8010+
8011+
test_tensor src_u8_ts(src_u8, engine, src_data);
8012+
test_tensor dst_s8_ts(compiled_output, engine);
8013+
ASSERT_EQ(cp.execute(strm, {src_u8_ts.get(), weight_s8_ts.get()},
8014+
{dst_s8_ts.get()}),
8015+
graph::status::success);
8016+
8017+
size_t curr_cache_size = graph::get_constant_tensor_cache(
8018+
engine->kind(), engine->index())
8019+
->get_size();
8020+
if (i != 0) {
8021+
// cache size should not change since no new weight cached
8022+
ASSERT_EQ(prv_cache_size, curr_cache_size);
8023+
}
8024+
prv_cache_size = curr_cache_size;
8025+
8026+
strm->wait();
8027+
}
8028+
// Reset constant tensor cache capacity as 0
8029+
dnnl::graph::set_constant_tensor_cache_capacity(
8030+
static_cast<engine::kind>(engine->kind()), 0);
8031+
}
8032+
8033+
TEST(test_matmul_execute_subgraph_int8, NoShareCachedWeight) {
8034+
graph::engine_t *engine = get_engine();
8035+
graph::stream_t *strm = get_stream();
8036+
std::string qtype = "per_channel";
8037+
8038+
std::vector<int64_t> weight_shape = {1024, 1024};
8039+
8040+
float scale_src = 1 / 255.f; // map to 0~255
8041+
float scale_out = 1;
8042+
int64_t zp_src = 0;
8043+
int64_t zp_out = engine->kind() == graph::engine_kind::gpu ? 0 : 78;
8044+
8045+
size_t scales_wei_sizes = weight_shape.back();
8046+
std::vector<float> scale_wei(scales_wei_sizes, 1 / 127.f);
8047+
std::vector<int64_t> zp_wei(scales_wei_sizes, 0);
8048+
8049+
graph::op_t dqdata_op(1, graph::op_kind::Dequantize, "dqdata_op");
8050+
dqdata_op.set_attr<std::string>(graph::op_attr::qtype, "per_tensor");
8051+
dqdata_op.set_attr<std::vector<int64_t>>(graph::op_attr::zps, {zp_src});
8052+
dqdata_op.set_attr<std::vector<float>>(graph::op_attr::scales, {scale_src});
8053+
dqdata_op.set_attr<int64_t>(graph::op_attr::axis, 0);
8054+
8055+
graph::op_t dqweight_op(2, graph::op_kind::Dequantize, "dqweight_op");
8056+
dqweight_op.set_attr<std::string>(graph::op_attr::qtype, "per_channel");
8057+
dqweight_op.set_attr<std::vector<int64_t>>(graph::op_attr::zps, zp_wei);
8058+
dqweight_op.set_attr<std::vector<float>>(graph::op_attr::scales, scale_wei);
8059+
dqweight_op.set_attr<int64_t>(graph::op_attr::axis, 1);
8060+
8061+
graph::op_t matmul_op(3, graph::op_kind::MatMul, "matmul_op");
8062+
matmul_op.set_attr<bool>(graph::op_attr::transpose_a, false);
8063+
matmul_op.set_attr<bool>(graph::op_attr::transpose_b, false);
8064+
8065+
graph::op_t qout_op(4, graph::op_kind::Quantize, "qout_op");
8066+
qout_op.set_attr<std::string>(graph::op_attr::qtype, "per_tensor");
8067+
qout_op.set_attr<std::vector<int64_t>>(graph::op_attr::zps, {zp_out});
8068+
qout_op.set_attr<std::vector<float>>(graph::op_attr::scales, {scale_out});
8069+
qout_op.set_attr<int64_t>(graph::op_attr::axis, 0);
8070+
8071+
// prepare logical tensor
8072+
auto src_u8 = utils::logical_tensor_init(1, graph::data_type::u8);
8073+
auto src_f32_dq = utils::logical_tensor_init(2, graph::data_type::f32);
8074+
auto weight_s8
8075+
= utils::logical_tensor_init(4, weight_shape, graph::data_type::s8);
8076+
weight_s8.property = graph::property_type::constant;
8077+
auto weight_f32_dq = utils::logical_tensor_init(
8078+
5, weight_shape, graph::data_type::f32);
8079+
auto dst_f32 = utils::logical_tensor_init(7, graph::data_type::f32);
8080+
auto dst_s8 = utils::logical_tensor_init(8, graph::data_type::s8);
8081+
8082+
dqdata_op.add_input(src_u8);
8083+
dqdata_op.add_output(src_f32_dq);
8084+
8085+
dqweight_op.add_input(weight_s8);
8086+
dqweight_op.add_output(weight_f32_dq);
8087+
8088+
matmul_op.add_input(src_f32_dq);
8089+
matmul_op.add_input(weight_f32_dq);
8090+
matmul_op.add_output(dst_f32);
8091+
8092+
qout_op.add_input(dst_f32);
8093+
qout_op.add_output(dst_s8);
8094+
8095+
graph::graph_t g(engine->kind());
8096+
g.add_op(&dqdata_op);
8097+
g.add_op(&dqweight_op);
8098+
g.add_op(&matmul_op);
8099+
g.add_op(&qout_op);
8100+
g.finalize();
8101+
8102+
graph::pass::pass_base_ptr apass = get_pass("x8x8x_matmul_post_ops");
8103+
apass->run(g);
8104+
ASSERT_EQ(g.get_num_partitions(), 1U);
8105+
auto part = g.get_partitions()[0];
8106+
ASSERT_EQ(part->get_ops().size(), 4U);
8107+
8108+
graph::partition_t p;
8109+
p.init(part);
8110+
8111+
std::vector<std::vector<int64_t>> src_shapes {{4, 1024}, {32, 1024}};
8112+
std::vector<std::vector<int64_t>> dst_shapes {{4, 1024}, {32, 1024}};
8113+
std::vector<std::shared_ptr<graph::compiled_partition_t>> cps;
8114+
size_t prv_cache_size = 0;
8115+
8116+
//set constant tensor cache capacity as 1GB
8117+
dnnl::graph::set_constant_tensor_cache_capacity(
8118+
static_cast<engine::kind>(engine->kind()), 1024);
8119+
8120+
std::default_random_engine generator(7);
8121+
std::uniform_real_distribution<float> s8_distribution(-127.0f, 128.0f);
8122+
std::vector<int8_t> weight_data(product(weight_shape));
8123+
// Construct different weight tensor obejct with different memory address.
8124+
std::vector<test_tensor> weight_s8_ts_vec;
8125+
for (size_t i = 0; i < src_shapes.size(); i++) {
80008126
std::generate(weight_data.begin(), weight_data.end(), [&]() {
80018127
return static_cast<int8_t>(s8_distribution(generator));
80028128
});
8129+
weight_s8_ts_vec.emplace_back(
8130+
test_tensor(weight_s8, engine, weight_data));
8131+
}
8132+
8133+
for (size_t i = 0; i < src_shapes.size(); ++i) {
8134+
std::vector<int64_t> src_shape = src_shapes[i];
8135+
std::vector<int64_t> dst_shape = dst_shapes[i];
8136+
8137+
src_u8 = utils::logical_tensor_init(1, src_shape, graph::data_type::u8);
8138+
dst_s8 = utils::logical_tensor_init(8, dst_shape, graph::data_type::s8);
8139+
std::vector<const graph::logical_tensor_t *> lt_ins {
8140+
&src_u8, &weight_s8};
8141+
std::vector<const graph::logical_tensor_t *> lt_outs {&dst_s8};
8142+
8143+
cps.push_back(std::make_shared<graph::compiled_partition_t>(p));
8144+
auto &cp = *cps.back();
8145+
8146+
ASSERT_EQ(p.compile(&cp, lt_ins, lt_outs, engine),
8147+
graph::status::success);
8148+
8149+
graph::logical_tensor_t compiled_output;
8150+
cp.query_logical_tensor(dst_s8.id, &compiled_output);
8151+
8152+
std::vector<uint8_t> src_data(product(src_shape));
8153+
std::uniform_real_distribution<float> u8_distribution(0.0f, 255.0f);
8154+
std::generate(src_data.begin(), src_data.end(), [&]() {
8155+
return static_cast<uint8_t>(u8_distribution(generator));
8156+
});
80038157

80048158
test_tensor src_u8_ts(src_u8, engine, src_data);
8005-
test_tensor weight_s8_ts(weight_s8, engine, weight_data);
80068159
test_tensor dst_s8_ts(compiled_output, engine);
8007-
ASSERT_EQ(cp.execute(strm, {src_u8_ts.get(), weight_s8_ts.get()},
8160+
ASSERT_EQ(cp.execute(strm, {src_u8_ts.get(), weight_s8_ts_vec[i].get()},
80088161
{dst_s8_ts.get()}),
80098162
graph::status::success);
80108163

@@ -8013,11 +8166,16 @@ TEST(test_matmul_execute_subgraph_int8, ShareCachedWeight) {
80138166
->get_size();
80148167

80158168
if (i != 0) {
8016-
// cache size should not change since no new weight cached
8017-
ASSERT_EQ(prv_cache_size, curr_cache_size);
8169+
// cache size changes since new weight tensor with different address
8170+
// will be cached
8171+
ASSERT_NE(prv_cache_size, curr_cache_size);
80188172
}
80198173
prv_cache_size = curr_cache_size;
80208174

80218175
strm->wait();
80228176
}
8177+
8178+
// Reset constant tensor cache capacity as 0
8179+
dnnl::graph::set_constant_tensor_cache_capacity(
8180+
static_cast<engine::kind>(engine->kind()), 0);
80238181
}

0 commit comments

Comments
 (0)