在ORACLE中的分析函数都是基于某几个字段划分计算窗口然后在窗口内进行聚合排名等等计算我想如果我们数据表的hash分区字段与分析函数中的partition by 字段一致的时候应该可以大大加快分析函数的运行效率因为每个分区上的数据可以单独进行运算互不干涉下面试验来验证我的想法
第一步创建一个分区表和普通表表结构与DBA_OBJECTS一致
create table t_partition_hash(
object_name varchar()
subobject_name varchar()
object_id number
data_object_id number
object_type varchar()
created date
last_ddl_time date
timestamp varchar()
status varchar()
temporary varchar()
generated varchar()
secondary varchar()
)
partition by hash(object_type)(
partition t_hash_p tablespace USERS
partition t_hash_p tablespace USERS
partition t_hash_p tablespace USERS
partition t_hash_p tablespace USERS
partition t_hash_p tablespace USERS
partition t_hash_p tablespace USERS
partition t_hash_p tablespace USERS
partition t_hash_p tablespace USERS
);
create table t_big_hash(
object_name varchar()
subobject_name varchar()
object_id number
data_object_id number
object_type varchar()
created date
last_ddl_time date
timestamp varchar()
status varchar()
temporary varchar()
generated varchar()
secondary varchar()
);
第二步准备数据从dba_object中把数据插入到两个表总共插入数据
insert into t_partition_hash select * from dba_objects;
insert into t_partition_hash select * from dba_objects;
第三步本采用RANK函数对两个表进行查询
begin
insert into t_rank
select object_id
rank() over (partition by object_type order by object_id) r_object_id
rank() over (partition by object_type order by subobject_name) r_subobject_name
rank() over (partition by object_type order by created) r_created
rank() over (partition by object_type order by last_ddl_time) r_last_ddl_time
rank() over (partition by object_type order by status) r_object_type
from t_partition_hash;
end;
使用hash分区表总共执行次的运行时间分别为sss s s
begin
insert into t_rank
select object_id
rank() over (partition by object_type order by object_id) r_object_id
rank() over (partition by object_type order by subobject_name) r_subobject_name
rank() over (partition by object_type order by created) r_created
rank() over (partition by object_type order by last_ddl_time) r_last_ddl_time
rank() over (partition by object_type order by status) r_object_type
from t_big_table;
end;
使用非分区表执行次的执行时间分别为sssss
由此可见采用有效的HASH分区表可以有效提升分析函数在oracle中的执行效率我相信随着数据量的增加将会有更明显的效果回头再测试一个项目中遇到的类似问题