大数据量分页存储过程效率测试附代码

pcajax

浏览: 2161533 次
性别:
来自: 上海

最近访客更多访客>>

morelily

zcm1205

知丿觉

u012363178

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

软件测试 SQL SQL Server 互联网 Go

在项目中，我们经常遇到或用到分页，那么在大数据量（百万级以上）下，哪种分页算法效率最优呢？我们不妨用事实说话。

测试环境

硬件：CPU 酷睿双核T5750 内存：2G

软件:Windows server 2003 + Sql server 2005

OK,我们首先创建一数据库：data_Test,并在此数据库中创建一表：tb_TestTable

1

create database data_Test --创建数据库data_Test
2

GO
3

use data_Test
4

GO
5

create table tb_TestTable --创建表
6

(
7

id int identity(1,1) primary key,
8

userName nvarchar(20) not null,
9

userPWD nvarchar(20) not null,
10

userEmail nvarchar(40) null
11

)
12

然后我们在数据表中插入2000000条数据:

1

--插入数据
2

set identity_insert tb_TestTable on
3

declare @count int
4

set @count=1
5

while @count<=2000000
6

begin
7

insert into tb_TestTable(id,userName,userPWD,userEmail) values(@count,'admin','admin888','lli0077@yahoo.com.cn')
8

set @count=@count+1
9

end
10

set identity_insert tb_TestTable off

我首先写了五个常用存储过程：

1，利用select top 和select not in进行分页，具体代码如下：

1

create procedure proc_paged_with_notin --利用select top and select not in
2

(
3

@pageIndex int, --页索引
4

@pageSize int --每页记录数
5

)
6

as
7

begin
8

set nocount on;
9

declare @timediff datetime --耗时
10

declare @sql nvarchar(500)
11

select @timediff=Getdate()
12

set @sql='select top '+str(@pageSize)+' * from tb_TestTable where(ID not in(select top '+str(@pageSize*@pageIndex)+' id from tb_TestTable order by ID ASC)) order by ID'
13

execute(@sql) --因select top后不支技直接接参数，所以写成了字符串@sql
14

select datediff(ms,@timediff,GetDate()) as 耗时
15

set nocount off;
16

end

2,利用select top 和 select max(列键)

1create procedure proc_paged_with_selectMax --利用select top and select max(列)
2

(

@pageIndex int, --页索引
4

@pageSize int --页记录数
5

)
6

as
7

begin
8

set nocount on;
9

declare @timediff datetime
10

declare @sql nvarchar(500)
11

select @timediff=Getdate()
12

set @sql='select top '+str(@pageSize)+' * From tb_TestTable where(ID>(select max(id) From (select top '+str(@pageSize*@pageIndex)+' id From tb_TestTable order by ID) as TempTable)) order by ID'
13

execute(@sql)
14

select datediff(ms,@timediff,GetDate()) as 耗时
15

set nocount off;
16

end

3,利用select top和中间变量--此方法因网上有人说效果最佳，所以贴出来一同测试

1

create procedure proc_paged_with_Midvar --利用ID>最大ID值和中间变量
2

(
3

@pageIndex int,
4

@pageSize int
5

)
6

as
7

declare @count int
8

declare @ID int
9

declare @timediff datetime
10

declare @sql nvarchar(500)
11

begin
12

set nocount on;
13

select @count=0,@ID=0,@timediff=getdate()
14

select @count=@count+1,@ID=case when @count<=@pageSize*@pageIndex then ID else @ID end from tb_testTable order by id
15

set @sql='select top '+str(@pageSize)+' * from tb_testTable where ID>'+str(@ID)
16

execute(@sql)
17

select datediff(ms,@timediff,getdate()) as 耗时
18

set nocount off;
19

end
20

4,利用Row_number() 此方法为SQL server 2005中新的方法,利用Row_number()给数据行加上索引

1

create procedure proc_paged_with_Rownumber --利用SQL 2005中的Row_number()
2

(
3

@pageIndex int,
4

@pageSize int
5

)
6

as
7

declare @timediff datetime
8

begin
9

set nocount on;
10

select @timediff=getdate()
11

select * from (select *,Row_number() over(order by ID asc) as IDRank from tb_testTable) as IDWithRowNumber where IDRank>@pageSize*@pageIndex and IDRank<@pageSize*(@pageIndex+1)
12

select datediff(ms,@timediff,getdate()) as 耗时
13

set nocount off;
14

end
15

5，利用临时表及Row_number

1

create procedure proc_CTE --利用临时表及Row_number
2

(
3

@pageIndex int, --页索引
4

@pageSize int --页记录数
5

)
6

as
7

set nocount on;
8

declare @ctestr nvarchar(400)
9

declare @strSql nvarchar(400)
10

declare @datediff datetime
11

begin
12

select @datediff=GetDate()
13

set @ctestr='with Table_CTE as
14

(select ceiling((Row_number() over(order by ID ASC))/'+str(@pageSize)+') as page_num,* from tb_TestTable)';
15

set @strSql=@ctestr+' select * From Table_CTE where page_num='+str(@pageIndex)
16

end
17

begin
18

execute sp_executesql @strSql
19

select datediff(ms,@datediff,GetDate())
20

set nocount off;
21

end
22

OK,至此，存储过程创建完毕，我们分别在每页10条数据的情况下在第2页，第1000页，第10000页，第100000页，第199999页进行测试，耗时单位：ms 每页测试5次取其平均值

存过	第2页耗时	第1000页耗时	第10000页耗时	第100000页耗时	第199999页耗时	效率排行
1用not in	0ms	16ms	47ms	475ms	953ms	3
2用select max	5ms	16ms	35ms	325ms	623ms	1
3中间变量	966ms	970ms	960ms	945ms	933ms	5
4row_number	0ms	0ms	34ms	365ms	710ms	2
4临时表	780ms	796ms	798ms	780ms	805ms	4

测试结果显示：select max >row_number>not in>临时表>中间变量

于是我对效率最高的select max方法用2分法进行了扩展，代码取自互联网，我修改了ASC排序时取不到值的BUG，测试结果:

2分法

156ms

180ms

470ms

156ms

从测试结果来看，使用2分法确实可以提高效率并使效率更为稳定，我又增加了第159999页的测试，用时仅296ms，效果相当的不错！

下面是2分法使用select max的代码，已相当完善。

1

--/*-----存储过程分页处理孙伟 2005-03-28创建 -------*/
2

--/*-----存储过程分页处理浪尘 2008-9-1修改----------*/
3

--/*----- 对数据进行了2分处理使查询前半部分数据与查询后半部分数据性能相同 -------*/
4

alter PROCEDURE proc_paged_2part_selectMax
6

(
7

@tblName nvarchar(200), ----要显示的表或多个表的连接
8

@fldName nvarchar(500) = '*', ----要显示的字段列表
9

@pageSize int = 10, ----每页显示的记录个数
10

@page int = 1, ----要显示那一页的记录
11

@fldSort nvarchar(200) = null, ----排序字段列表或条件
12

@Sort bit = 0, ----排序方法，0为升序，1为降序(如果是多字段排列Sort指代最后一个排序字段的排列顺序(最后一个排序字段不加排序标记)--程序传参如：' SortA Asc,SortB Desc,SortC ')
13

@strCondition nvarchar(1000) = null, ----查询条件,不需where
14

@ID nvarchar(150), ----主表的主键
15

@Dist bit = 0, ----是否添加查询字段的 DISTINCT 默认0不添加/1添加
16

@pageCount int = 1 output, ----查询结果分页后的总页数
17

@Counts int = 1 output ----查询到的记录数
18

)
19

AS
20

SET NOCOUNT ON
21

Declare @sqlTmp nvarchar(1000) ----存放动态生成的SQL语句
22

Declare @strTmp nvarchar(1000) ----存放取得查询结果总数的查询语句
23

Declare @strID nvarchar(1000) ----存放取得查询开头或结尾ID的查询语句
24top

分享到：

GTK的.NET的函数库 GTK# | sql2000和sql2005分页存储过程

2011-04-18 18:29
浏览 1011
评论(0)
分类:Web前端
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论