Skip to content

Commit

Permalink
Refactored billable invoice
Browse files Browse the repository at this point in the history
  • Loading branch information
QuanMPhm committed Jun 11, 2024
1 parent 537ab88 commit cfb7be3
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 207 deletions.
22 changes: 22 additions & 0 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from dataclasses import dataclass

import process_report.invoices.invoice as invoice
import process_report.util as util


@dataclass
class BillableInvoice(invoice.Invoice):
nonbillable_pis: list[str]
nonbillable_projects: list[str]
old_pi_filepath: str

def _prepare(self):
self.data = util.remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data = util.validate_pi_names(self.data)

def _process(self):
old_pi_df = util.load_old_pis(self.old_pi_filepath)
self.data, updated_old_pi_df = util.apply_credits_new_pi(self.data, old_pi_df)
util.dump_old_pis(self.old_pi_filepath, updated_old_pi_df)
8 changes: 8 additions & 0 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
import process_report.util as util


### PI file field names
PI_PI_FIELD = "PI"
PI_FIRST_MONTH = "First Invoice Month"
PI_INITIAL_CREDITS = "Initial Credits"
PI_1ST_USED = "1st Month Used"
PI_2ND_USED = "2nd Month Used"
###

### Invoice field names
INVOICE_DATE_FIELD = "Invoice Month"
PROJECT_FIELD = "Project - Allocation"
Expand Down
185 changes: 23 additions & 162 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
import boto3
import pyarrow

from process_report.invoices import lenovo_invoice, nonbillable_invoice
from process_report.invoices import (
lenovo_invoice,
nonbillable_invoice,
billable_invoice,
)


### PI file field names
Expand Down Expand Up @@ -62,33 +66,6 @@ def load_institute_map() -> dict:
return institute_map


def load_old_pis(old_pi_file) -> pandas.DataFrame:
try:
old_pi_df = pandas.read_csv(
old_pi_file,
dtype={
PI_INITIAL_CREDITS: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
except FileNotFoundError:
sys.exit("Applying credit 0002 failed. Old PI file does not exist")

return old_pi_df


def dump_old_pis(old_pi_file, old_pi_df: pandas.DataFrame):
old_pi_df = old_pi_df.astype(
{
PI_INITIAL_CREDITS: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
old_pi_df.to_csv(old_pi_file, index=False)


def load_alias(alias_file):
alias_dict = dict()

Expand All @@ -104,31 +81,6 @@ def load_alias(alias_file):
return alias_dict


def get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
I.e 0 for new PIs
Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
first_invoice_month = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi, PI_FIRST_MONTH]
if first_invoice_month.empty:
return 0

month_diff = get_month_diff(invoice_month, first_invoice_month.iat[0])
if month_diff < 0:
sys.exit(
f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
)
else:
return month_diff


def get_month_diff(month_1, month_2):
"""Returns a positive integer if month_1 is ahead in time of month_2"""
dt1 = datetime.datetime.strptime(month_1, "%Y-%m")
dt2 = datetime.datetime.strptime(month_2, "%Y-%m")
return (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month)


def get_invoice_bucket():
try:
s3_resource = boto3.resource(
Expand Down Expand Up @@ -297,22 +249,29 @@ def main():
bucket = get_invoice_bucket()
invoice.export_s3(bucket)

billable_projects = remove_non_billables(merged_dataframe, pi, projects)
billable_projects = validate_pi_names(billable_projects)

if args.upload_to_s3:
backup_to_s3_old_pi_file(old_pi_file)
credited_projects = apply_credits_new_pi(billable_projects, old_pi_file)

export_billables(credited_projects, args.output_file)
export_pi_billables(credited_projects, args.output_folder, invoice_month)
export_BU_only(billable_projects, args.BU_invoice_file, args.BU_subsidy_amount)
export_HU_BU(credited_projects, args.HU_BU_invoice_file)
billable_inv = billable_invoice.BillableInvoice(
name=args.nonbillable_file,
invoice_month=invoice_month,
data=merged_dataframe.copy(),
nonbillable_pis=pi,
nonbillable_projects=projects,
old_pi_filepath=old_pi_file,
)
billable_inv.process()
billable_inv.export()
if args.upload_to_s3:
bucket = get_invoice_bucket()
billable_inv.export_s3(bucket)

export_pi_billables(billable_inv.data, args.output_folder, invoice_month)
export_BU_only(billable_inv.data, args.BU_invoice_file, args.BU_subsidy_amount)
export_HU_BU(billable_inv.data, args.HU_BU_invoice_file)

if args.upload_to_s3:
invoice_list = [
args.output_file,
]
invoice_list = list()

for pi_invoice in os.listdir(args.output_folder):
invoice_list.append(os.path.join(args.output_folder, pi_invoice))
Expand Down Expand Up @@ -377,23 +336,6 @@ def timed_projects(timed_projects_file, invoice_date):
return dataframe[mask]["Project"].to_list()


def remove_non_billables(dataframe, pi, projects):
"""Removes projects and PIs that should not be billed from the dataframe"""
filtered_dataframe = dataframe[
~dataframe[PI_FIELD].isin(pi) & ~dataframe[PROJECT_FIELD].isin(projects)
]
return filtered_dataframe


def validate_pi_names(dataframe):
invalid_pi_projects = dataframe[pandas.isna(dataframe[PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
print(f"Warning: Billable project {row[PROJECT_FIELD]} has empty PI field")
dataframe = dataframe[~pandas.isna(dataframe[PI_FIELD])]

return dataframe


def validate_pi_aliases(dataframe: pandas.DataFrame, alias_dict: dict):
for pi, pi_aliases in alias_dict.items():
dataframe.loc[dataframe[PI_FIELD].isin(pi_aliases), PI_FIELD] = pi
Expand All @@ -408,87 +350,6 @@ def fetch_s3_alias_file():
return local_name


def apply_credits_new_pi(dataframe, old_pi_file):
new_pi_credit_code = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]

dataframe[CREDIT_FIELD] = None
dataframe[CREDIT_CODE_FIELD] = None
dataframe[BALANCE_FIELD] = Decimal(0)

old_pi_df = load_old_pis(old_pi_file)

current_pi_set = set(dataframe[PI_FIELD])
invoice_month = dataframe[INVOICE_DATE_FIELD].iat[0]
invoice_pis = old_pi_df[old_pi_df[PI_FIRST_MONTH] == invoice_month]
if invoice_pis[PI_INITIAL_CREDITS].empty or pandas.isna(
new_pi_credit_amount := invoice_pis[PI_INITIAL_CREDITS].iat[0]
):
new_pi_credit_amount = INITIAL_CREDIT_AMOUNT

print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}")

for pi in current_pi_set:
pi_projects = dataframe[dataframe[PI_FIELD] == pi]
pi_age = get_pi_age(old_pi_df, pi, invoice_month)
pi_old_pi_entry = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi].squeeze()

if pi_age > 1:
for i, row in pi_projects.iterrows():
dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD]
else:
if pi_age == 0:
if len(pi_old_pi_entry) == 0:
pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0]
old_pi_df = pandas.concat(
[
pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
old_pi_df,
],
ignore_index=True,
)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[PI_PI_FIELD] == pi
].squeeze()

remaining_credit = new_pi_credit_amount
credit_used_field = PI_1ST_USED
elif pi_age == 1:
remaining_credit = (
pi_old_pi_entry[PI_INITIAL_CREDITS] - pi_old_pi_entry[PI_1ST_USED]
)
credit_used_field = PI_2ND_USED

initial_credit = remaining_credit
for i, row in pi_projects.iterrows():
if remaining_credit == 0 or row[SU_TYPE_FIELD] in EXCLUDE_SU_TYPES:
dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD]
else:
project_cost = row[COST_FIELD]
applied_credit = min(project_cost, remaining_credit)

dataframe.at[i, CREDIT_FIELD] = applied_credit
dataframe.at[i, CREDIT_CODE_FIELD] = new_pi_credit_code
dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] - applied_credit
remaining_credit -= applied_credit

credits_used = initial_credit - remaining_credit
if (pi_old_pi_entry[credit_used_field] != 0) and (
credits_used != pi_old_pi_entry[credit_used_field]
):
print(
f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
)
old_pi_df.loc[
old_pi_df[PI_PI_FIELD] == pi, credit_used_field
] = credits_used

dump_old_pis(old_pi_file, old_pi_df)

return dataframe


def fetch_s3_old_pi_file():
local_name = "PI.csv"
invoice_bucket = get_invoice_bucket()
Expand Down
Loading

0 comments on commit cfb7be3

Please sign in to comment.