日期:2014-05-16  浏览次数:20428 次

处理在线日志数据过大问题之一--根据rowid批量删除

         我们有一个实时日志表数据量太大了(这个不是分区表),这个表不能中断业务,我们先要在线删除部分数据。有这个是生产库不能中断业务不能使用create table rename 的方法来搞。我就老老实实的使用rowid批删除的方法。


1、先创建了一个存储过程:
create or replace procedure delete_table_by_condition(table_name varchar2,
                                                      condition  varchar2) is
  type mycursor_type is ref cursor;
  mycursor mycursor_type;
  --cursor mycursor is
  -- SELECT ROWID FROM table_name WHERE condition order by rowid;
  type rowid_table_type is table of rowid index by pls_integer;
  v_rowid         rowid_table_type;
  v_sql_getrowid  varchar2(400);
  v_sql_del_table varchar2(400);
BEGIN
  v_sql_getrowid  := 'SELECT ROWID FROM ' || table_name || ' WHERE ' ||
                     condition || ' order by rowid ';
  v_sql_del_table := 'delete from ' || table_name || ' WHERE rowid = :1';
  dbms_output.put_line(v_sql_getrowid);
  open mycursor for v_sql_getrowid;
  loop
    fetch mycursor bulk collect--------每次处理5000行,也就是每5000行一提交
      into v_rowid limit 5000;
    exit when v_rowid.count = 0;
    forall i in v_rowid.first .. v_rowid.last
      execute immediate v_sql_del_table using v_rowid(i);
    commit;
  end loop;
  close mycursor;
END;

2、在创建一个job,让他每天5点中跑。

declare
  JOBNAME varchar2(100) := 'JOB_delete_table';
  JOB_CNT int;
begin
  select count(*) into JOB_CNT from user_scheduler_jobs uj where upper(uj.JOB_NAME) = upper(JOBNAME);
  if JOB_CNT >= 1 then
    begin dbms_scheduler.drop_job(job_name => JOBNAME); end;
  end if;
end;

/

begin
  dbms_scheduler.create_job(job_name        => 'JOB_delete_table',
                            job_type        => 'PLSQL_BLOCK',
                            job_action      => 'begin delete_table_by_condition(''ACCESSLOG'',''logtime < to_char(trunc(sysdate-1),''''yyyy-mm-dd hh24:mi:ss'''')'');end;',
                            repeat_interval => 'FREQ=DAILY;BYHOUR=5;byminute=0',
                            enabled         => true);
end;

/

exec DBMS_SCHEDULER.enable('JOB_delete_table');

3、处理完以后,检查表碎片
  SELECT table_name,
         ROUND (BLOCKS * 8192 / 1024 / 1024 , 2) "total_size(M)",
         ROUND (num_rows * AVG_ROW_LEN / 1024 / 1024 , 2) "used_size(M)",
         ROUND ( ( (BLOCKS * 8192 / 1024 / 1024 ) - (num_rows * AVG_ROW_LEN / 1024 / 1024 )), 2) "wasted_size(M)",
         ROUND (ROUND ( ( (BLOCKS * 8192 / 1024 / 1024 )- (num_rows * AVG_ROW_LEN / 1024 / 1024 )),2)/
                     ROUND (BLOCKS * 8192 / 1024 / 1024 , 2),2)* 100|| '%' wasted_percent
    FROM user_tables
   WHERE   ROUND (BLOCKS * 8192 / 1024 / 1024 , 2) <> 0
ORDER BY 2 desc;

发现表碎片比较厉害,我打算收缩一下表。

4、在线收缩一下表使用的空间,降低水位。
alter table accesslog shrink space。

5、总结,效果还是可以的。查询速度比之前快了一点,磁盘也省了。但是存储过程和表shrink的时间太长了。
还是考虑分区表。