From 2bf7309907f4bb458082ed647ea1f249ef5b6e6d Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Tue, 8 Aug 2023 13:11:21 +0100 Subject: [PATCH] Refactor to remove route generator --- benchmarks/lobsters/benchmark.rb | 83 ++--------------- benchmarks/lobsters/route_generator.rb | 121 +++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 75 deletions(-) create mode 100644 benchmarks/lobsters/route_generator.rb diff --git a/benchmarks/lobsters/benchmark.rb b/benchmarks/lobsters/benchmark.rb index 6f783c93..28e02091 100644 --- a/benchmarks/lobsters/benchmark.rb +++ b/benchmarks/lobsters/benchmark.rb @@ -7,58 +7,14 @@ use_gemfile extra_setup_cmd: "bin/rails db:drop db:create && sqlite3 db/production.sqlite3 < db/faked_bench_data.sql" require_relative 'config/environment' +require_relative "route_generator" app = Rails.application +generator = RouteGenerator.new(app) +generator.do_login # We want to be logged in for all generated requests +generator.routes # Make sure routes have been pregenerated -# TODO: a wider variety of routes might better show megamorphism in call sites - -# Do we need to distinguish between e.g. banned and non-banned users? - -ROUTE_GROUPS = [ - { num: 50, method: :GET, routes: ["/u"] }, # Users tree, showing order of invitation - lots of view logic - { num: 50, method: :GET, routes: ["/active", "/newest", "/recent", "/hottest"] }, # Views of the stories by attributes - { num: 25, method: :GET, routes: ["/rss", "/privacy", "/about", "/settings"] }, # Less-common and less-interesting routes for variation - { num: 25, method: :GET, routes: ["/top?length=1d", "/top?length=1w", "/top?length=1y"] }, # Top stories by time - { num: 50, method: :GET, routes: ["/hidden", "/saved", "/upvoted/stories"] }, # These all required being logged in - - # Turned off flag_warning check for /threads -- to hard to port to SQLite - { num: 50, routes: ["/comments", "/upvoted/comments", "/threads", "/comments/:comment_id/reply"] }, - { num: 25, routes: ["/threads/:username", "/u/:username"] }, - - { num: 50, routes: ["/replies", "/replies/comments", "/replies/stories", "/replies/unread"] }, # replies#stories, replies#unread, replies#comments - - #{ num: 25, routes: ["/s/:story_id"] }, # need to check more into how stories work - e.g. story#show doesn't seem side-effect-free - - # /categories: - admin-only - #{ num: 50, routes: ["/stats"] }, # Stats gets a 500, needs more MySQL->SQLite porting - # Shouldn't add /404, because that returns status 404, not 200 - # No messages added during fake-data task, so skip messages controller - # The moderators controller isn't high-traffic, plus it has various MySQL time code that needs porting - skip it? - -] - -rng = Random.new(0x1be52551fc152997) - -db_ids = { - # Find appropriate model files - comment_id: Comment.all.pluck(:short_id), - #story_id: Story.all.pluck(:short_id), - username: User.all.pluck(:username), -} - -visiting_envs = [] -ROUTE_GROUPS.each do |group| - group[:num].times do - route = group[:routes].sample(random: rng) - if route.include?(":") - route = route.gsub(/:(\w+)/) do |match| - db_ids[$1.to_sym].sample(random: rng) - end - end - visiting_envs << Rack::MockRequest::env_for("https://localhost#{route}", method: group[:method]) - end -end - +# Track ActiveRecord time if ENV['TRACK_AR_TIME'] ar_total_duration = 0.0 process_start_t = Time.now @@ -76,36 +32,13 @@ end end -# Let's be able to log in as one specific user... -# With the srand seed given in lib/tasks/fake_data.rake, we use the fake data for one of the users - -# Lobsters doesn't love setting a logged-in cookie if you previously had no cookie. So first GET /login -# and set the cookie from there. -login_get_env = Rack::MockRequest::env_for("https://localhost/login") -login_get_resp = app.call(login_get_env) -auth_token_line = login_get_resp[2].join.lines.detect { |line| line.include?("authenticity_token") && line.include?("value") } -auth_token = auth_token_line.scan(/value="([^"]+)"/)[0][0] -resp_cookie_header = login_get_resp[1]["Set-Cookie"] #+ "; tag_filters=NOCACHE" # turn off the file cache - -# Let's log in as one specific user... -# With the srand seed given in lib/tasks/fake_data.rake, we use the fake data for one of the users -login_post_env = Rack::MockRequest::env_for("https://localhost/login", method: "POST", params: { email: "wiegand.michell@mertz-vonrueden.test", password: "ji3W36xR", authenticity_token: auth_token }) -login_post_env["HTTP_COOKIE"] = resp_cookie_header -login_post_resp = app.call(login_post_env) -raise("Can't log in as fake user wiegand.michell: #{login_post_resp.inspect}") unless login_post_resp[0] == 302 -resp_cookie_header = login_post_resp[1]["Set-Cookie"] #+ "; tag_filters=NOCACHE" # turn off the file cache - run_benchmark(10) do - visiting_envs.each_with_index do |env, idx| + generator.routes.each_with_index do |env, idx| path = env["PATH_INFO"] # app.call mutates the path - env["HTTP_COOKIE"] = resp_cookie_header # TODO: disable file cache w/ cookie? - response_array = app.call(env) + response_array = generator.visit(env) # Track HTTP cookies as we go along unless response_array.first == 200 puts response_array.inspect - raise "HTTP status is #{response_array.first} instead of 200 for req #{idx}/#{visiting_envs.size}, #{path.inspect}. Is the benchmark app properly set up? See README.md." - end - if response_array[1]["Set-Cookie"] - resp_cookie_header = response_array[1]["Set-Cookie"] + raise "HTTP status is #{response_array.first} instead of 200 for req #{idx}/#{generator.routes.size}, #{path.inspect}. Is the benchmark app properly set up? See README.md." end end end diff --git a/benchmarks/lobsters/route_generator.rb b/benchmarks/lobsters/route_generator.rb new file mode 100644 index 00000000..5ffc9715 --- /dev/null +++ b/benchmarks/lobsters/route_generator.rb @@ -0,0 +1,121 @@ +# Generate a set of routes for Lobsters + +class RouteGenerator + # Take a variety of routes and randomise order, distribution and specific data items (comments, users.) + ROUTE_GROUPS = [ + { num: 50, method: :GET, routes: ["/u"] }, # Users tree, showing order of invitation - lots of view logic + { num: 50, method: :GET, routes: ["/active", "/newest", "/recent", "/hottest"] }, # Views of the stories by attributes + { num: 25, method: :GET, routes: ["/rss", "/privacy", "/about", "/settings"] }, # Less-common and less-interesting routes for variation + { num: 25, method: :GET, routes: ["/top?length=1d", "/top?length=1w", "/top?length=1y"] }, # Top stories by time + { num: 50, method: :GET, routes: ["/hidden", "/saved", "/upvoted/stories"] }, # These all required being logged in + + # Turned off flag_warning check for /threads -- to hard to port to SQLite + { num: 50, routes: ["/comments", "/upvoted/comments", "/threads", "/comments/:comment_id/reply"] }, + { num: 25, routes: ["/threads/:username", "/u/:username"] }, + + { num: 50, routes: ["/replies", "/replies/comments", "/replies/stories", "/replies/unread"] }, # replies#stories, replies#unread, replies#comments + + #{ num: 25, routes: ["/s/:story_id"] }, # need to check more into how stories work - e.g. story#show doesn't seem side-effect-free + + # /categories: - admin-only + #{ num: 50, routes: ["/stats"] }, # Stats gets a 500, needs more MySQL->SQLite porting + # Shouldn't add /404, because that returns status 404, not 200 + # No messages added during fake-data task, so skip messages controller + # The moderators controller isn't high-traffic, plus it has various MySQL time code that needs porting - skip it? + + # POSTs are harder here. Comments seem to exist mostly in the context of stories, which changes their behaviour. + # We'd need to do roughly what the Faker does, where we create a story and do various interaction in the context + # of it. For now, skip it. + #{ num: 10, method: :POST, routes: ["/comments/:comment_id/upvote"], post_opts: {} }, + ] + + def initialize(app, rng: nil) + @app = app + + @auth_token = nil + @resp_cookie_header = nil + @logged_in = false + + @rng = rng || Random.new(0x1be52551fc152997) + end + + def routes + @routes ||= generate_routes + end + + def visit(route) + route["HTTP_COOKIE"] = @resp_cookie_header + response_array = @app.call(route) + if response_array[1]["Set-Cookie"] + @resp_cookie_header = response_array[1]["Set-Cookie"] + end + response_array + end + + ### Helpers to Query Rails Data + + def auth_token + return @auth_token if @auth_token + + # We need to log in to get a CSRF token. We'll use the same token for all requests. + # We also need to get the CSRF token before generating the env hashes for later requests. + + # First GET /login and set the cookie from there. CSRF token from a single session should work throughout that session. + login_get_env = Rack::MockRequest::env_for("https://localhost/login") + login_get_resp = @app.call(login_get_env) + auth_token_line = login_get_resp[2].join.lines.detect { |line| line.include?("authenticity_token") && line.include?("value") } + @auth_token = auth_token_line.scan(/value="([^"]+)"/)[0][0] + @resp_cookie_header = login_get_resp[1]["Set-Cookie"] #+ "; tag_filters=NOCACHE" # turn off the file cache + + @auth_token + end + + def do_login + return if @logged_in + + auth_token # make sure we have the auth token + + # Let's log in as one specific user... + # With the srand seed given in lib/tasks/fake_data.rake, we use the fake data for one of the users + login_post_env = Rack::MockRequest::env_for("https://localhost/login", method: "POST", params: { email: "wiegand.michell@mertz-vonrueden.test", password: "ji3W36xR", authenticity_token: @auth_token }) + login_post_env["HTTP_COOKIE"] = @resp_cookie_header + login_post_resp = @app.call(login_post_env) + raise("Can't log in as fake user wiegand.michell: #{login_post_resp.inspect}") unless login_post_resp[0] == 302 + @resp_cookie_header = login_post_resp[1]["Set-Cookie"] #+ "; tag_filters=NOCACHE" # turn off the file cache + @logged_in = true + end + + private + + def generate_routes + db_ids = { + comment_id: Comment.all.pluck(:short_id), + username: User.all.pluck(:username), + } + + # We want to randomise the order, but we need to make sure a user, comment, etc. exists when it's referenced. + # So we start by creating a set of references to "this group is at this point in the order" and then + # fill them in, keeping track of what data items exist as we go along. + + group_list = ROUTE_GROUPS.flat_map { |group| (1..group[:num]).map { group } } # group[:num] references to each group + group_list.shuffle!(random: @rng) + route_group_envs = [] + group_list.each do |group| + route = group[:routes].sample(random: @rng) + if route.include?(":") + route = route.gsub(/:(\w+)/) do |match| + db_ids[$1.to_sym].sample(random: @rng) + end + end + route_group_envs << Rack::MockRequest::env_for("https://localhost#{route}", method: group[:method]) + + # Do we need to mess with our list of data items? + # If we figure out comment upvote/flag/delete etc. we'll need some of this. + # For now, ignore. + #if group[:method] != :GET && group[:post_opts] + #end + end + + route_group_envs + end +end