日期:2014-05-17  浏览次数:20484 次

按要求数据配对?
SQL code

/*
有一张表(大约200万条记录)。为方便测试,剔除无关信息,随机生成10000行记录,保留3列,记为:
test(usrid,value,u_type),其中usrid唯一,value在1000范围以内,u_type为‘Yes'或‘No’。

--要求:选择表中value值相同但u_type不同的记录,将其usrid成对找出来。如果有1对多,多对1,
--      多对多情形,则任选其一,使其成为1对1.
--例如:如果原始表为:
usrid    value    u_type
1          1        Yes 
2          34       No
4          86       No
5          34       No
6          7        Yes
8          1        Yes
9          1        No
3          10       Yes
89         10       Yes
78         7        No
14         2        No
66         2        Yes
102        2        No
708        8        Yes
84         8        No
99         8        Yes
182        8        No
最终表为(2行):
Usrid_Yes    Usrid_No
1              9
6              78
66             14
99             84
这里像value为1的记录,u_type有2个Yes,1个No。属于多对1,那么任意挑一个Yes和No的记录,找出其usrid(1和9)。
value为2,8的记录属于1对多,多对多,做类似处理。

--说明一下,使用类似下面的cross join的方法,大数据量时,不太可行。
select 
    t1.usr ,
    t1.u_type,
    t2.usr ,
    t2.u_type
from test2 t1,test2 t2
where t1.value=t2.value and t1.u_type!=t2.u_type and t1.usr!=t2.usr
order by t1.usr,t2.usr
。
。
。
*/

--随机生成数据
if OBJECT_ID('test2') is not null
drop table test2
go

create table test2(usrid int,value int,u_type varchar(5))

declare @i int
set @i=1

while @i<=10000
begin
    insert into test2 values(@i,ABS(CHECKSUM(newid())%1000),ABS(CHECKSUM(newid()))%2)
    set @i=@i+1
end

update test2
set u_type=case u_type when 1 then 'Yes'
               when 0 then 'No' end from test2 

--select count(1) from test2 



------解决方案--------------------
SQL code

-->try
select a.usrid,a.value from
(select row_number() over(partition by value order by value) rn,* from test2)a
inner join 
(select value from test2 group by value having count(distinct u_type)>1) b
on a.value=b.value
where a.rn=1

------解决方案--------------------
with tt as (
select value 
from test 
group by value
having COUNT(distinct u_type)>1
)
select 
(select top 1 usrid from test t where t.value=tt.value and t.u_type='Yes') Usrid_Yes ,
(select top 1 usrid from test t where t.value=tt.value and t.u_type='No') Usrid_No 
from tt

------解决方案--------------------
SQL code

 with t as (
 select value,u_type,usrid,
 ROW_NUMBER() over (partition by value,u_type order by usrid) as rn
 from test2  
 ),tYes as(
 select * From t where u_type='Yes' and rn=1
 ) ,tNo as (
 select * From t where u_type='No' and rn=1
 )
 select a.usrid as Usrid_Yes,b.usrid as Usrid_No
 from tYes a join tNo b 
 on a.value=b.value   
 order by a.usrid