Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE]: ALB as Trigger #1

Open
mountainash opened this issue Sep 9, 2024 · 1 comment
Open

[FEATURE]: ALB as Trigger #1

mountainash opened this issue Sep 9, 2024 · 1 comment

Comments

@mountainash
Copy link

Thank you very much for your script. It's come in very handy as a proxy for a client pointed to our servers that we couldn't easily change their DNS settings - so we used this to point out to our newer host.

These changes are to the format of the event received when an AWS Load Balancer is used as the trigger. If you diff it with the current HEAD, you'll see some subtle changes (sorry I didn't go to the effort of forking this repo).

Other additions:

  • added robots noindex, nofollow to error returns so they aren't indexed by search engines
  • typo fix on the word "funtion"

Also it runs on latest version of Python on Lamda v3.12

#!/usr/bin/env python
# -*- coding: utf-8 -*-

__author__ = "Ircama"
__copyright__ = "Copyright 2021-2023, Ircama"
__license__ = "CC BY-NC-SA 4.0"
__version__ = "1.0.2"
__maintainer__ = "Ircama"

import base64
import pprint
import os
import urllib3
from urllib.parse import urlencode

# Disable the "Not Secure" warning when using HTTPS:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

GENERAL_ERROR = os.environ.get('GENERAL_ERROR') or "AWS Lambda Error"

PAYLOAD_QUOTA = int(os.environ.get('PAYLOAD_QUOTA') or 5000000)
# https://docs.aws.amazon.com/lambda/latest/dg/gettingstarted-limits.html

def proxy_handler(event, context):
    if os.environ.get('REMOTE_URL') is None:
        return {
            "statusCode": 500,
            "body": GenerateErrorPage("Missing REMOTE_URL environment variable",
                    GENERAL_ERROR + ": missing REMOTE_URL environment variable",
                    'AWS Lambda function not correctly configured'),
            "headers": {
                'Content-Type': 'text/html',
                'X-Robots-Tag': 'noindex, nofollow',
            }
        }
    url = os.environ['REMOTE_URL']
    if "path" in event and event["path"]:
        url += event["path"]
    if "queryStringParameters" in event and event["queryStringParameters"]:
        url += "?" + urlencode(event["queryStringParameters"])

    http_method = event['httpMethod']

    if (os.environ.get('FILTERED_PATH') is not None and
            "path" in event and
            event["path"] in os.environ['FILTERED_PATH'].split("|")):
        return {
            "statusCode": 500,
            "body": GenerateErrorPage(url,
                    GENERAL_ERROR,
                    os.environ.get('FILTERED_URL_MSG') or 'Filtered URL.'),
            "headers": {
                'Content-Type': 'text/html',
                'X-Robots-Tag': 'noindex, nofollow',
            }
        }

    # &trace_connection=y to print log data
    trace_connection = False
    if (event.get('queryStringParameters') and
            event['queryStringParameters'].get("trace_connection")):
        trace_connection = True
    # &dump_request=y to dump the request
    dump_request = False
    if (event.get('queryStringParameters') and
            event['queryStringParameters'].get("dump_request")):
        dump_request = True

    cookies = []  # set this only if cookies are not referred to a lambda url
    domain = event['headers']['host'].split(".")
    if not (len(domain) == 5 and
            domain[1] == 'lambda-url' and
            domain[3] == 'on' and
            domain[4] == 'aws') and event.get('cookies'):
        cookies = event['headers']['cookie']

    headers = event['headers']
    # if event.get('headers') and os.environ.get('NO_HEADERS'):
        #headers.pop('accept', None)
        # headers.pop('accept-encoding', None)
        #headers.pop('accept-language', None)
        #headers.pop('content-length', None)
    headers.pop('host', event['headers']['host'])
        #headers.pop('sec-ch-ua', None)
        #headers.pop('sec-ch-ua-mobile', None)
        #headers.pop('sec-ch-ua-platform', None)
        #headers.pop('sec-fetch-dest', None)
        #headers.pop('sec-fetch-mode', None)
        #headers.pop('sec-fetch-site', None)
        #headers.pop('sec-fetch-user', None)
        #headers.pop('upgrade-insecure-requests', None)
        #headers.pop('user-agent', None)
        #headers.pop('x-amzn-trace-id', None)
        #headers.pop('x-forwarded-for', None)
        #headers.pop('x-forwarded-port', None)
        #headers.pop('x-forwarded-proto', None)

    body = ''
    if event.get('body'):
        body = event['body']

    if event.get('isBase64Encoded'):
        if event['isBase64Encoded']:
            body = base64.b64decode(body)
    
    if cookies:
        headers['Cookie'] = '; '.join(cookies)  # not set with lambda url

    if trace_connection:
        print("trace_connection - remote url =", url)
        print("trace_connection - local http_method =", http_method)
        print("trace_connection - headers =", headers)
        print("trace_connection - cookies =", cookies)
        print("trace_connection - body =", body)

    retries = urllib3.util.Retry(connect=0, read=0, redirect=0)
    http = urllib3.PoolManager(
        cert_reqs='CERT_NONE',
        timeout=float(os.environ.get('REQUEST_TIMEOUT') or 11.0),
        retries=retries)

    try:
        resp = http.request(
            method=http_method,
            url=url,
            headers=headers,
            body=body,
            redirect=False
        )
        resp_cookies = resp.headers.getlist('Set-Cookie')
        if trace_connection:
            print("trace_connection - statusCode returned from remote =", resp.status)
            print("trace_connection - resp_cookies =", resp_cookies)
            print("trace_connection - resp.headers =", resp.headers)
            print("trace_connection - size of received data =", len(resp.data))
            print("trace_connection - size of encoded data =", len(base64.b64encode(resp.data)))
        # https://docs.aws.amazon.com/apigateway/latest/developerguide/http-api-develop-integrations-lambda.html
        if len(resp.data) > PAYLOAD_QUOTA:
            return {
                "statusCode": 413,
                "body": GenerateErrorPage(url,
                        GENERAL_ERROR + ' (Payload Too Large)',
                        'Too much data to return'
                        ' from the remote web site over AWS Lambda'),
                "headers": {
                    'Content-Type': 'text/html',
                    'X-Robots-Tag': 'noindex, nofollow',
                }
            }
        # Here substitutions can be optionally applied to resp.data
        response = {
            "cookies": resp_cookies,
            "isBase64Encoded": True,
            "statusCode": resp.status,
            "body": base64.b64encode(resp.data),
            "headers": { i:resp.headers[i] for i in resp.headers }
        }
    except urllib3.exceptions.MaxRetryError:
        if trace_connection:
            print('trace_connection - Remote server down.')
        response = {
            "statusCode": 500,
            "body": GenerateErrorPage(url,
                    GENERAL_ERROR,
                    'Remote server down.'),
            "headers": {
                'Content-Type': 'text/html',
                'X-Robots-Tag': 'noindex, nofollow',
            }
        }
    except urllib3.exceptions.NewConnectionError:
        if trace_connection:
            print('trace_connection - Connection failed.')
        response = {
            "statusCode": 500,
            "body": GenerateErrorPage(url,
                    GENERAL_ERROR,
                    'Connection failed'),
            "headers": {
                'Content-Type': 'text/html',
                'X-Robots-Tag': 'noindex, nofollow',
            }
        }
    except Exception as e:
        if trace_connection:
            print('trace_connection - Connection error:', repr(e))
        response = {
            "statusCode": 500,
            "body": GenerateErrorPage(url,
                    GENERAL_ERROR,
                    'Connection error: ' + repr(e)),
            "headers": {
                'Content-Type': 'text/html',
                'X-Robots-Tag': 'noindex, nofollow',
            }
        }

    #print("response=", json.dumps(response, indent=4))

    if not dump_request:
        return response

    return {
        'statusCode': 200,
        'body': (
            ""
            """<!DOCTYPE html><html><head><title>Request dump</title></head>
<style>
pre {
    border: 2px solid grey;
    border-left: 6px solid #8f9090;
    border-radius: 8px;
    padding-left: 14px;
    padding-bottom: 14px;
    padding-right: 14px;
    width: -moz-fit-content;
    width: fit-content;
    margin: auto;
    line-height: 15px;
    background-image: linear-gradient(180deg, #f5f5f5 50%, #fff 50%);
    background-size: 100% 30px;
    background-position: 0 14px;
    box-shadow: 5px 5px 10px rgb(0 0 0 / 30%);
    -webkit-box-shadow: 5px 5px 10px rgba(0,0,0,0.3);
}
</style><body><hr /><div><pre>""" +
            #json.dumps(event, indent=4) + "\n<hr />" +
            pprint.pformat(event, indent=4) + "\n<hr />" +
            "Lambda function name: " +
                repr(context.function_name) + "\n" +
            "Lambda function version: " +
                repr(context.function_version) + "\n" +
            "Lambda function ARN: " +
                repr(context.invoked_function_arn) + "\n" +
            "CloudWatch log stream name: " +
                repr(context.log_stream_name) + "\n" +
            "CloudWatch log group name: " +
                repr(context.log_group_name) + "\n" +
            "Lambda Request ID: " +
                repr(context.aws_request_id) + "\n" +
            "Lambda function memory limits in MB: " +
                repr(context.memory_limit_in_mb) + "\n" +
            "Lambda time remaining in MS: " +
                repr(context.get_remaining_time_in_millis()) + "\n" +
            "\n<hr />" +
            "</pre></body></html>"
            ),
        "headers": {
            'Content-Type': 'text/html',
            'X-Robots-Tag': 'noindex, nofollow',
        }
    }
    # https://docs.aws.amazon.com/lambda/latest/dg/python-context.html


def GenerateErrorPage(url, error, description):
    return """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
    <title>""" + (
        os.environ.get('GENERAL_ERROR') or "AWS Lambda Error"
        ) + """</title>
    </head>
    <body>
        <div class="jumbotron">
            <h1 class="display-4">""" + (
            os.environ.get('APP_NAME') or "Sample Application") + """</h1>
            <p class="lead"><h4><pre><b>""" + error  + """</b></pre></p></h4>
        </div>
        <div class="container-fluid">
        <h3><a style="color:red">""" + description + """</a></h3>
        </div>
        <br />
        <br />
        <div class="container-fluid">
        <h3><a href='""" + url + """'>Click here to access the same page on the target system</a></h3>
        </div>
    </body>
</html> 
"""
Ircama added a commit that referenced this issue Sep 9, 2024
Ircama added a commit that referenced this issue Sep 9, 2024
@Ircama
Copy link
Owner

Ircama commented Sep 9, 2024

Thanks for this note. I updated the code for the parts related to the typo and to the X-Robots-Tag. As per the AWS load balancer integration, please revise the code to keep compatibility with the HTTP API integration.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants