日期:2014-05-17 浏览次数:20627 次
/*
有一张表(大约200万条记录)。为方便测试,剔除无关信息,随机生成10000行记录,保留3列,记为:
test(usrid,value,u_type),其中usrid唯一,value在1000范围以内,u_type为‘Yes'或‘No’。
--要求:选择表中value值相同但u_type不同的记录,将其usrid成对找出来。如果有1对多,多对1,
-- 多对多情形,则任选其一,使其成为1对1.
--例如:如果原始表为:
usrid value u_type
1 1 Yes
2 34 No
4 86 No
5 34 No
6 7 Yes
8 1 Yes
9 1 No
3 10 Yes
89 10 Yes
78 7 No
14 2 No
66 2 Yes
102 2 No
708 8 Yes
84 8 No
99 8 Yes
182 8 No
最终表为(2行):
Usrid_Yes Usrid_No
1 9
6 78
66 14
99 84
这里像value为1的记录,u_type有2个Yes,1个No。属于多对1,那么任意挑一个Yes和No的记录,找出其usrid(1和9)。
value为2,8的记录属于1对多,多对多,做类似处理。
--说明一下,使用类似下面的cross join的方法,大数据量时,不太可行。
select
t1.usr ,
t1.u_type,
t2.usr ,
t2.u_type
from test2 t1,test2 t2
where t1.value=t2.value and t1.u_type!=t2.u_type and t1.usr!=t2.usr
order by t1.usr,t2.usr
。
。
。
*/
--随机生成数据
if OBJECT_ID('test2') is not null
drop table test2
go
create table test2(usrid int,value int,u_type varchar(5))
declare @i int
set @i=1
while @i<=10000
begin
insert into test2 values(@i,ABS(CHECKSUM(newid())%1000),ABS(CHECKSUM(newid()))%2)
set @i=@i+1
end
update test2
set u_type=case u_type when 1 then 'Yes'
when 0 then 'No' end from test2
--select count(1) from test2
-->try select a.usrid,a.value from (select row_number() over(partition by value order by value) rn,* from test2)a inner join (select value from test2 group by value having count(distinct u_type)>1) b on a.value=b.value where a.rn=1
------解决方案--------------------
with tt as (
select value
from test
group by value
having COUNT(distinct u_type)>1
)
select
(select top 1 usrid from test t where t.value=tt.value and t.u_type='Yes') Usrid_Yes ,
(select top 1 usrid from test t where t.value=tt.value and t.u_type='No') Usrid_No
from tt
------解决方案--------------------
with t as (
select value,u_type,usrid,
ROW_NUMBER() over (partition by value,u_type order by usrid) as rn
from test2
),tYes as(
select * From t where u_type='Yes' and rn=1
) ,tNo as (
select * From t where u_type='No' and rn=1
)
select a.usrid as Usrid_Yes,b.usrid as Usrid_No
from tYes a join tNo b
on a.value=b.value
order by a.usrid