sql的

liliang_xf

浏览: 64467 次
性别:
来自: 湖北

最近访客更多访客>>

cxm0714

娃儿001

稍等劫色

dwinyu

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

SQL SQL Server Go SUN 数据结构

SQL查询重复数据和清除重复数据[转]

2009-08-16 21:53

有例表：emp

emp_no   name    age
    001           Tom      17
    002           Sun       14
    003           Tom      15
    004           Tom      16

要求：

列出所有名字重复的人的记录

(1)最直观的思路：要知道所有名字有重复人资料，首先必须知道哪个名字重复了：

select name from emp group by name having count(*)>1

所有名字重复人的记录是:

select * from emp
where name in (select name from emp group by name having count(*)>1)

(2)稍微再聪明一点，就会想到，如果对每个名字都和原表进行比较，大于2个人名字与这条记录相同的就是合格的，就有

select * from emp where (select count(*) from emp e where e.name=emp.name) >1

--注意一下这个>1，想下如果是 =1，如果是 =2 如果是>2 如果 e 是另外一张表而且是=0那结果就更好玩了:)

这个过程是在判断工号为001的人的时候先取得 001的名字（emp.name）然后和原表的名字进行比较 e.name

注意e是emp的一个别名。

再稍微想得多一点，就会想到，如果有另外一个名字相同的人工号不与她他相同那么这条记录符合要求：

select   *   from   emp
    where   exists
                  (select   *   from   emp   e    where   e.name=emp.name   and   e.emp_no<>emp.emp_no)

此思路的join写法：

select   emp.*       from   emp,emp e
        where emp.name=e.name and emp.emp_no<>e.emp_no/**/
/*     这个语句较规范的   join   写法是
select emp.* from   emp   inner join emp   e     on emp.name=e.name and emp.emp_no<>e.emp_no
但个人比较倾向于前一种写法，关键是更清晰     */
b、有例表：emp
name     age
Tom       16
Sun        14
Tom       16
Tom       16

----------------------------------------------------清除重复----------------------------------------------------
过滤掉所有多余的重复记录
(1)我们知道distinct、group by 可以过滤重复，于是就有最直观的

select distinct * from emp 或 select name,age from emp group by name,age

获得需要的数据，如果可以使用临时表就有解法：

select   distinct   *   into   #tmp    from   emp
    delete   from   emp
    insert   into   emp   select   *   from   #tmp

(2)但是如果不可以使用临时表，那该怎么办？
我们观察到我们没办法区分数据（物理位置不一样，对 SQL Server来说没有任何区别），思路自然是想办法把数据区分出来了，既然现在的所有的列都没办法区分数据，唯一的办法就是再加个列让它区分出来，加什么列好？最佳选择是identity列：

alter table emp add chk int identity(1,1)

表示例：

name   age   chk
    Tom     16     1
    Sun      14     2
    Tom     16     3
    Tom     16     4

重复记录可以表示为：

select * from emp where (select count(*) from emp e where e.name=emp.name)>1

要删除的是：

delete   from   emp
    where (select   count(*)   from   emp   e     where   e.name=emp.name   and   e.chk>=emp.chk)>1

再把添加的列删掉，出现结果。

alter   table   emp   drop   column   chk

(3)另一个思路：
视图

select min(chk) from emp group by name having count(*) >1

获得有重复的记录chk最小的值,于是可以

delete from emp where chk not in (select min(chk) from emp group by name)

写成join的形式也可以:

(1)有例表：emp

emp_no    name    age
    001            Tom      17
    002            Sun       14
    003            Tom      15
    004            Tom      16

◆要求生成序列号
(1)最简单的方法，根据b问题的解法：

alter table emp add chk int identity(1,1) 或
select *,identity(int,1,1) chk into #tmp from emp

◆如果需要控制顺序怎么办？

select top 100000 *,identity(int,1,1) chk into #tmp from emp order by age

(2) 假如不可以更改表结构，怎么办？
如果不可以唯一区分每条记录是没有办法的，在可以唯一区分每条记录的时候,可以使用a 中的count的思路解决这个问题

select   emp.*,(select   count(*)   from   emp   e   where   e.emp_no<=emp.emp_no)
    from   emp
    order   by   (select   count(*)   from   emp   e   where   e.emp_no<=emp.emp_no)

分享到：

手动添加依赖的jar文件到maven仓库 | webim

2010-01-11 13:32
浏览 1073
评论(1)
分类:企业架构
查看更多

1 楼 liliang_xf 2010-01-11

SQL子查询,连接查询,数据汇总,GROUP BY，ORDER BY子句的使用
2009-04-14 22:49
/*1、子查询的使用*/

/*（1）查找在财务部工作的雇员的情况*/
select *
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部')
go

/*（2）查找所有收入在2500元以下的雇员的情况*/
select*
from employees
where employeeid in
(select employeeid
from salary
where income<2500)
go

select name,salary.*
from employees,salary
where employees.employeeid=salary.employeeid and
income<2500
go

/*（3）查找财务部年龄不低于研发部雇员年龄的雇员的姓名*/
select*
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部')
and birthday!>all(select birthday
from employees
where departmentid in
(select departmentid
from departments
where departmentname='研发部'))
go
/*显示财务部和研发部人员的姓名生日部门名*/
select name,birthday,departmentname
from employees,departments
where employees.departmentid=departments.departmentid and
(departmentname='研发部'or
departmentname='财务部')
go
select name,birthday,departmentname
from employees,departments
where employees.departmentid=departments.departmentid and
departmentname in ('研发部','财务部')
go

/*（4）查找比所有财务部的雇员收入都高的雇员的姓名*/
select*
from employees
where employeeid in
(select employeeid
from salary
where income>all
(select income
from salary
where employeeid in
(select employeeid
from employees
where departmentid in
(select departmentid
from departments
where departmentname like'财务部'))))
go

/*（5）查找所有年龄比研发部雇员年龄都大的雇员的姓名*/
select *
from employees
where birthday<all
(select birthday
from employees
where departmentid in
(select departmentid
from departments
where departmentname='研发部'))
go

/*2、连接查询的使用*/
/*（1）查找每个雇员的情况以及其薪水的情况*/
select employees.*,salary.*
from employees,salary
where employees.employeeid=salary.employeeid
go

/*（2）查找每个雇员的情况及其工作部门的情况*/
select employees.*,departments.*
from employees,departments
where employees.departmentid=departments.departmentid
go

/*（3）查找财务部收入在2200元以上的雇员姓名及其薪水详情*/
select name,salary.*
from employees,salary,departments
where employees.employeeid=salary.employeeid and
employees.departmentid=departments.departmentid and
departmentname='财务部' and income>2200
go

/*（4）查找研发部在1966年以前出生的雇员姓名及其薪水详情*/
select employees.*,salary.*
from employees,salary,departments
where employees.employeeid=salary.employeeid and
employees.departmentid=departments.departmentid and
departmentname='研发部'and
birthday<'1966' /*注意1966必须有单引号*/
go

/*3、数据汇总*/

/*（1）求财务部雇员的平均收入*/
select avg(income) as '财务部平均收入'
from salary
where employeeid in
(select employeeid
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部'))
go

/*（2）查询财务部雇员的最高收入和最低收入*/
select max(income)as'最高收入',min(income)as'最低收入'
from salary
where employeeid in
(select employeeid
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部'))
go

/*（3）求财务部雇员的平均实际收入*/
select avg(income-outcome)as'平均实际收入'
from salary
where employeeid in
(select employeeid
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部'))
go

/*（4）查询财务部雇员的最高和最低实际收入*/
select max(income-outcome)as'最高收入',min(income-outcome)as'最低收入'
from salary
where employeeid in
(select employeeid
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部'))
go

/*(5) 求财务部雇员的总人数*/
select count(employeeid)as'财务部总人数'
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部')
go

/*（6）统计财务部收入在2500元以上雇员的人数*/
select count(employeeid)as'财务部收入在2500元以上总人数'
from employees,salary,departments
where employees.employeeid=salary.employeeid and
employees.departmentid=departments.departmentid and
income>'2500'and departmentname='财务部'
go

select count(employeeid)as'财务部收入在2500元以上总人数'
from employees
where departmentid in
(select departmentid
from departments
where departmentname='财务部'in
(select departmentname='财务部'
from departments
where departmentid in
(select departmentid
from employees
where employeeid in
(select employeeid
from salary
where income>'2500'))))
go

/*4、GROUP BY，ORDER BY子句的使用*/

/*（1）求各部门的雇员数*/
select count(employeeid)as'各部门的雇员人数 '
from employees
group by departmentid
go

/*（2）统计各部门收入在2000元以上雇员的人数*/
select count(employeeid)as'各部门入在2000元以上的人数 '
from employees
where employeeid in
(select employeeid
from salary
where income>'2000')
group by departmentid
go

/*（3）将各雇员的情况按收入由低到高排列*/
select employees.*,salary.*
from employees,salary
where employees.employeeid=salary.employeeid
order by income
go

/*（4）将各雇员的情况按出生时间先后排列*/
select*
from employees
order by birthday /*由大到小*/
go

一、ORDER BY是一个可选的子句，它允许你根据指定要order by的列来以上升或者下降的顺序来显示查询的结果，它不需要查询结果中出现order by的栏位.
更改Order by里的栏位只会影响查询结果的顺序,而不影响查询出的记录总数,和每条记录的内容.

二、group by 从英文里理解就是分组。必须有“聚合函数”来配合才能使用，使用时至少需要一个分组标志字段。

什么是“聚合函数”？
像sum()、count()、avg()等都是“聚合函数”
使用group by 的目的就是要将数据分类汇总。

一般如：
    select 单位名称,count(职工id),sum(职工工资) form [某表]
    group by 单位名称
    这样的运行结果就是以“单位名称”为分类标志统计各单位的职工人数和工资总额。

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

sql的

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

sql的

评论

发表评论

相关推荐

最近访客更多访客>>