Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix trigger_log_entries_aggr #33

Open
matyaskopp opened this issue Nov 9, 2021 · 5 comments
Open

fix trigger_log_entries_aggr #33

matyaskopp opened this issue Nov 9, 2021 · 5 comments

Comments

@matyaskopp
Copy link
Member

this trigger:

CREATE OR REPLACE FUNCTION trigger_log_entries_aggr()
RETURNS TRIGGER AS
$$
declare
  period RECORD;
  period_start TIMESTAMP;
  period_end TIMESTAMP;
  endpoint INTEGER;
begin
  IF tg_op='INSERT' THEN
    -- loop period levels
    FOR period IN
      SELECT unnest(enum_range(null::period_levels)) as period
    LOOP
      SELECT INTO period_start,period_end p.period_start, p.period_end
        FROM log_aggr_period(new.time_local, period.period::period_levels) p;
      FOR endpoint IN
        SELECT endpoint_id FROM user_endpoints WHERE ip = new.remote_addr AND is_active IS TRUE -- TODO test start_date
      LOOP
        PERFORM log_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, endpoint, new.service_id, 1, new.body_bytes_sent);
        PERFORM log_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, endpoint, NULL, 1, new.body_bytes_sent);
      END LOOP;
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, new.service_id);
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, NULL);
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, new.service_id);
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, NULL);
    END LOOP;

    RETURN new;
  ELSE
    RETURN null;
  END IF;
END;
$$
LANGUAGE plpgsql;

has been temporarily replaced with:

CREATE OR REPLACE FUNCTION trigger_log_entries_aggr()
RETURNS TRIGGER AS
$$
declare
  period RECORD;
  period_start TIMESTAMP;
  period_end TIMESTAMP;
  endpoint INTEGER;
begin
  IF tg_op='INSERT' THEN
    -- loop period levels
    FOR period IN
      SELECT unnest(enum_range(null::period_levels)) as period
    LOOP
      SELECT INTO period_start,period_end p.period_start, p.period_end
        FROM log_aggr_period(new.time_local, period.period::period_levels) p;
--      FOR endpoint IN
--        SELECT endpoint_id FROM user_endpoints WHERE ip = new.remote_addr AND is_active IS TRUE -- TODO test start_date
--      LOOP
--        PERFORM log_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, endpoint, new.service_id, 1, new.body_bytes_sent);
--        PERFORM log_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, endpoint, NULL, 1, new.body_bytes_sent);
--      END LOOP;
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, new.service_id);
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, NULL);
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, new.service_id);
      PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, NULL);
    END LOOP;

    RETURN new;
  ELSE
    RETURN null;
  END IF;
END;
$$
LANGUAGE plpgsql;
@matyaskopp
Copy link
Member Author

matyaskopp commented Nov 9, 2021

This has no effect on the performance.
The loading log files performance is 10.5 valid records per second.

@matyaskopp
Copy link
Member Author

Doing only hour period aggregation !!!

CREATE OR REPLACE FUNCTION trigger_log_entries_aggr()
RETURNS TRIGGER AS
$$
declare
  period RECORD;
  period_start TIMESTAMP;
  period_end TIMESTAMP;
  endpoint INTEGER;

begin
  IF tg_op='INSERT' THEN
    -- loop period levels
--    FOR period IN
--      SELECT unnest(enum_range(null::period_levels)) as period
--    LOOP
      SELECT INTO period_start,period_end p.period_start, p.period_end
--        FROM log_aggr_period(new.time_local, period.period::period_levels) p;
        FROM log_aggr_period(new.time_local, 'hour'::period_levels) p; -- hour period hardcoded
--      FOR endpoint IN
--        SELECT endpoint_id FROM user_endpoints WHERE ip = new.remote_addr AND is_active IS TRUE -- TODO test start_date
--      LOOP
--        PERFORM log_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, endpoint, new.service_id, 1, new.body_bytes_sent);
--        PERFORM log_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, endpoint, NULL, 1, new.body_bytes_sent);
--      END LOOP;
      -- PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, new.service_id);
      PERFORM log_ip_aggr_new_entry('hour'::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, new.service_id); -- hour period hardcoded
      -- PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, NULL);
      PERFORM log_ip_aggr_new_entry('hour'::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, NULL); -- hour period hardcoded
      -- PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, new.service_id);
      PERFORM log_ip_aggr_new_entry('hour'::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, new.service_id); -- hour period hardcoded
      -- PERFORM log_ip_aggr_new_entry(period.period::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, NULL);
      PERFORM log_ip_aggr_new_entry('hour'::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, NULL, NULL); -- hour period hardcoded
--    END LOOP;
    RETURN new;
  ELSE
    RETURN null;
  END IF;
END;
$$
LANGUAGE plpgsql;

@matyaskopp
Copy link
Member Author

The result of #33 (comment) is

  • 10times slower
  • using 4 cores instead of one

@matyaskopp
Copy link
Member Author

Aggregate only full pair (IP, service)

  • approx 1.2 records/second (on 12 minutes runtime sample)
  • 4 CPU cores used
CREATE OR REPLACE FUNCTION trigger_log_entries_aggr()
RETURNS TRIGGER AS
$$
declare
  period RECORD;
  period_start TIMESTAMP;
  period_end TIMESTAMP;
  endpoint INTEGER;

begin
  IF tg_op='INSERT' THEN
      SELECT INTO period_start,period_end p.period_start, p.period_end
        FROM log_aggr_period(new.time_local, 'hour'::period_levels) p; -- hour period hardcoded
      PERFORM log_ip_aggr_new_entry('hour'::period_levels, period_start, period_end, new.unit, new.body_bytes_sent, new.remote_addr, new.service_id); -- hour period hardcoded
    RETURN new;
  ELSE
    RETURN null;
  END IF;
END;
$$
LANGUAGE plpgsql;

@matyaskopp
Copy link
Member Author

Temporarily dropped whole trigger -> no improvement

DROP TRIGGER log_files_lines_read_aggr ON log_file_entries RESTRICT;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant