数据库去除重复记录思路一
大数据量的更新,最好不要用update,可以这么弄:
步骤1:
create table us_publiccomment_01 nologging as select * from us_publiccomment where rowid in(
select row_id
from (select t.rowid row_id,
row_number() over(partition by t.msisdn,t.contentid , t.commentary order by t.createtime) rn
from us_publiccomment t)
where rn = 1
);
步骤2(软删除重复记录,硬删除忽略):
alter table us_publiccomment_01 nologging;
insert into us_publiccomment_01 select msisdn,contentid,commentary,...,'2'(publishstatus),... from us_publiccomment where rowid in(
select row_id
from (select t.rowid row_id,
row_number() over(partition by t.msisdn,t.contentid , t.commentary order by t.createtime) rn
from us_publiccomment t)
where rn > 1
);
commit;
alter table us_publiccomment_01 logging;
步骤3:
rename us_publiccomment to us_publiccomment_bak;
rename us_publiccomment_01 to us_publiccomment;
步骤4:给新表加约束、主键、索引
如:
alter table us_publiccomment modify msisdn not null;
alter table us_publiccomment modify msisdn default '11111111111';
--
alter table us_publiccomment add constraint pk_us_publiccomment primary key (msisdn) using index tablespace tbs_mread_score_idx;
步骤5:
drop table us_publiccomment_bak;