日期:2014-05-16  浏览次数:20769 次

group by 分组时 or 的问题,疑难杂症,坐等牛人。。
SQL code

create  table ttt
         (
          id number(4) primary key,
          name varchar2(16) ,
          sex varchar2(2),
          age number(4)
         )
         
         insert into ttt values (1, 'a','男', 2);
         insert into ttt values (6, 'a','男', 4);
         insert into ttt values (3, 'a','男', 3);
         insert into ttt values (8, 'a','女', 4);
         
         insert into ttt values (7, 'b','女', 4);
         insert into ttt values (2, 'b','女', 2);
         insert into ttt values (9, 'b','女', 4);
         
         insert into ttt values (4, 'c','女', 4);
         insert into ttt values (5, 'e','女', 5);


要求:统计数据库中重复数据的条数(有可能不止重复两条)
重复条件:(name相同,sex相同) 或者 (name相同,age相同),去掉统计时的重复记录(如name相同,sex相同,age相同的记录,不能重复统计)
正确答案:7条
求用sql查询实现
已知算法:
1.
SQL code

--这个算法最简单,但是真实数据量很大,这个效率太差
select count(distinct t.id) from ttt t,
         (select name,0 age,sex from ttt group by name,sex having count(1)>1 union
         select name,age,'' sex from ttt group by name,age having count(1)>1) m where 
         t.name=m.name and (t.age=m.age or t.sex=m.sex);


2.
SQL code

--此种方式不能去除重复记录,查出来为10条
select sum(count(1)) coun from ttt group by grouping sets((name,sex),(name,age)) having count(1)>1;


等sql牛人来解决。。。。。。

------解决方案--------------------
--查出所有重复记录
select name,sex,age,id from(
select name,sex,age,id from ttt where exists (select name,sex from
(select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a
where ttt.name=a.name and ttt.sex=a.sex))a
union
( select name,sex,age,id from ttt where exists (select name,age from
(select name,age,count(name) from ttt group by name,age having count(name)>=2)a
where ttt.name=a.name and ttt.age=a.age))
------解决方案--------------------
--查询重复条数
select count(*) from (
select name,sex,age,id from(
select name,sex,age,id from ttt where exists (select name,sex from
(select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a
where ttt.name=a.name and ttt.sex=a.sex))a
union
( select name,sex,age,id from ttt where exists (select name,age from
(select name,age,count(name) from ttt group by name,age having count(name)>=2)a
where ttt.name=a.name and ttt.age=a.age))
)

------解决方案--------------------
Have a try.
SQL code
with a as
   (select id, count(*) over(partition by t.name, t.sex) count from ttt t),
  b as
   (select id, count(*) over(partition by t.name, t.age) count from ttt t)
  select count(id)
    from (select a.id
            from a
           where a.count >= 2
          union
          select b.id from b where b.count >= 2)

------解决方案--------------------
在原来基础上还可以继续优化,代码如下:
SQL code
select count(*)
  from (select id,
               count(*) over(partition by t.name, t.sex) count1,
               count(*) over(partition by t.name, t.age) count2
          from ttt t) a
 where a.count1 > 1
    or a.count2 > 1

------解决方案--------------------
探讨

引用:

在原来基础上还可以继续优化,代码如下:
SQL code
select count(*)