Skip to content

Commit

Permalink
server: Store SHA-256 base64 & hex for uploaded files
Browse files Browse the repository at this point in the history
  • Loading branch information
birkjernstrom committed May 15, 2024
1 parent 295dd38 commit 0d4ee3a
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 53 deletions.
72 changes: 41 additions & 31 deletions clients/apps/web/src/components/Benefit/Files/Dropzone.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
'use client'

import { api } from '@/utils/api'
import { FileRead, Organization } from '@polar-sh/sdk'

import { api } from '@/utils/api'
import { useRef } from 'react'
// Credit: https://codepen.io/dulldrums/pen/RqVrRr
const hex = (buffer: ArrayBuffer) => {
var hexCodes = []
var view = new DataView(buffer)
for (var i = 0; i < view.byteLength; i += 4) {
// Using getUint32 reduces the number of iterations needed (we process 4 bytes each time)
var value = view.getUint32(i)
// toString(16) will give the hex representation of the number without padding
var stringValue = value.toString(16)
// We use concatenation and slice for padding
var padding = '00000000'
var paddedValue = (padding + stringValue).slice(-padding.length)
hexCodes.push(paddedValue)
}

// Join all the hex strings into one
return hexCodes.join('')
}

const getSha256Hash = async (file: ArrayBuffer) => {
const hash = await crypto.subtle.digest('SHA-256', file)
return hash
}

const Dropzone = ({
organization,
Expand All @@ -12,36 +34,28 @@ const Dropzone = ({
organization: Organization
onUploaded: (file: FileRead) => void
}) => {
const inputFileRef = useRef<HTMLInputElement>(null)

const handleUpload = async (image: string) => {
if (!inputFileRef.current?.files) {
throw new Error('No file selected')
}
const handleUpload = async (file: File, buffer: ArrayBuffer) => {
const sha256hash = await getSha256Hash(buffer)
const sha256hex = hex(sha256hash)
const base64hash = btoa(String.fromCharCode(...new Uint8Array(sha256hash)))

if (image === undefined) {
throw new Error('No image')
}

const file = inputFileRef.current.files[0]
const params = {
organization_id: organization.id,
name: file.name,
size: file.size,
mime_type: file.type,
sha256: {
base64: base64hash,
hex: sha256hex,
},
version: null,
}
const response = await api.files.createFile({
fileCreate: params,
})

const binary = atob(image.split(',')[1])
const array = []
for (var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i))
}
let blob = new Blob([buffer], { type: file.type })

let blob = new Blob([new Uint8Array(array)], { type: file.type })
const result = await fetch(response.url, {
method: 'PUT',
headers: response.headers,
Expand All @@ -63,24 +77,20 @@ const Dropzone = ({
<>
<input
name="file"
ref={inputFileRef}
type="file"
required
multiple={true}
tabIndex={-1}
onChange={async (e) => {
if (e.target.files && e.target.files[0]) {
const files = e.target.files
Array.from(files).forEach((file) => {
const reader = new FileReader()
reader.onload = async (readerLoad) => {
if (
readerLoad.target &&
typeof readerLoad.target.result === 'string'
) {
console.log('imageSet', readerLoad.target.result)
await handleUpload(readerLoad.target.result)
}
reader.onload = async () => {
const result = reader.result
await handleUpload(file, result)
}
reader.readAsDataURL(e.target.files[0])
}
reader.readAsArrayBuffer(file)
})
}}
/>
</>
Expand Down
37 changes: 37 additions & 0 deletions clients/packages/sdk/src/client/models/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4674,6 +4674,12 @@ export interface FileCreate {
* @memberof FileCreate
*/
mime_type: string;
/**
*
* @type {SHA256Checksums}
* @memberof FileCreate
*/
sha256: SHA256Checksums | null;
/**
*
* @type {string}
Expand Down Expand Up @@ -4729,6 +4735,12 @@ export interface FilePresignedRead {
* @memberof FilePresignedRead
*/
size: number;
/**
*
* @type {SHA256Checksums}
* @memberof FilePresignedRead
*/
sha256: SHA256Checksums | null;
/**
*
* @type {string}
Expand Down Expand Up @@ -4820,6 +4832,12 @@ export interface FileRead {
* @memberof FileRead
*/
size: number;
/**
*
* @type {SHA256Checksums}
* @memberof FileRead
*/
sha256: SHA256Checksums | null;
/**
*
* @type {string}
Expand Down Expand Up @@ -9236,6 +9254,25 @@ export interface RewardsSummaryReceiver {
*/
avatar_url?: string;
}
/**
*
* @export
* @interface SHA256Checksums
*/
export interface SHA256Checksums {
/**
*
* @type {string}
* @memberof SHA256Checksums
*/
base64: string | null;
/**
*
* @type {string}
* @memberof SHA256Checksums
*/
hex: string | null;
}

/**
*
Expand Down
33 changes: 33 additions & 0 deletions server/migrations/versions/2024-05-15-1340_file_checksum_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""file checksum update
Revision ID: 12e1632fb710
Revises: bab063af0113
Create Date: 2024-05-15 13:40:05.732079
"""

import sqlalchemy as sa
from alembic import op

# Polar Custom Imports
from polar.kit.extensions.sqlalchemy import PostgresUUID

# revision identifiers, used by Alembic.
revision = "12e1632fb710"
down_revision = "bab063af0113"
branch_labels: tuple[str] | None = None
depends_on: tuple[str] | None = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column("files", "checksum_sha256", new_column_name="sha256_base64")
op.add_column("files", sa.Column("sha256_hex", sa.String(), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column("files", "sha256_base64", new_column_name="checksum_sha256")
op.drop_column("files", "sha256_hex")
# ### end Alembic commands ###
3 changes: 2 additions & 1 deletion server/polar/file/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,9 @@ async def mark_uploaded(
if not await authz.can(subject, AccessType.write, organization):
raise NotPermitted()

return await file_service.mark_uploaded(
file = await file_service.mark_uploaded(
session,
organization=organization,
file=file,
)
return FileRead.from_db(file)
59 changes: 43 additions & 16 deletions server/polar/file/schemas.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from typing import Self
from typing import Any, Self

from pydantic import UUID4

Expand All @@ -12,11 +12,17 @@ def get_disposition(file_name: str):
return f'attachment; filename="{file_name}"'


class SHA256Checksums(Schema):
base64: str | None
hex: str | None


class FileCreate(Schema):
organization_id: UUID4
name: str
size: int
mime_type: str
sha256: SHA256Checksums | None
version: str | None = None


Expand All @@ -29,23 +35,17 @@ class FileRead(Schema):
version: str | None = None
mime_type: str
size: int
sha256: SHA256Checksums | None

status: str

uploaded_at: datetime | None = None
created_at: datetime
modified_at: datetime | None = None


class FilePresignedRead(FileRead):
url: str
url_expires_at: datetime

headers: dict[str, str] = {}

@classmethod
def from_presign(cls, record: File, url: str, expires_at: datetime) -> Self:
return cls(
def prepare_dict_from_db(cls, record: File) -> dict[str, Any]:
return dict(
id=record.id,
organization_id=record.organization_id,
name=record.name,
Expand All @@ -54,17 +54,44 @@ def from_presign(cls, record: File, url: str, expires_at: datetime) -> Self:
mime_type=record.mime_type,
size=record.size,
status=record.status,
url=url,
url_expires_at=expires_at,
sha256=SHA256Checksums(
base64=record.sha256_base64,
hex=record.sha256_hex,
),
uploaded_at=record.uploaded_at,
created_at=record.created_at,
modified_at=record.modified_at,
headers={
"Content-Disposition": get_disposition(record.name),
"Content-Type": record.mime_type,
},
)

@classmethod
def from_db(cls, record: File) -> Self:
params = cls.prepare_dict_from_db(record)
return cls(**params)


class FilePresignedRead(FileRead):
url: str
url_expires_at: datetime

headers: dict[str, str] = {}

@classmethod
def from_presign(cls, record: File, url: str, expires_at: datetime) -> Self:
params = cls.prepare_dict_from_db(record)
params.update(
dict(
url=url,
url_expires_at=expires_at,
headers={
"Content-Disposition": get_disposition(record.name),
"Content-Type": record.mime_type,
"x-amz-checksum-sha256": record.sha256_base64,
"x-amz-sdk-checksum-algorithm": "SHA256",
},
)
)
return cls(**params)


class FileUpdate(Schema):
id: UUID4
Expand Down
17 changes: 13 additions & 4 deletions server/polar/file/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import structlog

from polar.config import settings
from polar.exceptions import PolarError, ResourceNotFound
from polar.exceptions import BadRequest, PolarError, ResourceNotFound
from polar.kit.services import ResourceService
from polar.kit.utils import generate_uuid, utc_now
from polar.models import Organization, User
Expand Down Expand Up @@ -64,6 +64,8 @@ async def generate_presigned_upload_url(
Key=key,
ContentDisposition=get_disposition(create_schema.name),
ContentType=create_schema.mime_type,
ChecksumAlgorithm="SHA256",
ChecksumSHA256=create_schema.sha256.base64,
),
ExpiresIn=expires_in,
)
Expand All @@ -76,7 +78,9 @@ async def generate_presigned_upload_url(
presigned_at=utc_now(),
presign_expiration=expires_in,
presign_expires_at=presign_expires_at,
**create_schema.model_dump(),
sha256_base64=create_schema.sha256.base64,
sha256_hex=create_schema.sha256.hex,
**create_schema.model_dump(exclude={"sha256"}),
)
session.add(instance)
await session.flush()
Expand Down Expand Up @@ -132,12 +136,17 @@ async def mark_uploaded(
log.error("aws.s3", file_id=file.id, key=file.key, error="No S3 metadata")
raise FileNotFound(f"No S3 metadata exists for ID: {file.id}")

checksums = metadata.get("Checksums", {})
checksums = metadata.get("Checksum", {})
sha256_base64 = checksums.get("ChecksumSHA256")
if file.sha256_base64 and sha256_base64 != file.sha256_base64:
log.error("aws.s3", file_id=file.id, key=file.key, error="SHA256 missmatch")
raise BadRequest()

file.sha256_base64 = sha256_base64
file.status = FileStatus.uploaded
file.uploaded_at = metadata["LastModified"]
file.etag = metadata.get("ETag")
file.version_id = metadata.get("VersionId")
file.checksum_sha256 = checksums.get("ChecksumSHA256")
# Update size from S3 or fallback on original size given by client
file.size = metadata.get("ObjectSize", file.size)

Expand Down
3 changes: 2 additions & 1 deletion server/polar/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def organization(cls) -> Mapped["Organization"]:
)

version_id: Mapped[str] = mapped_column(String, nullable=True)
checksum_sha256: Mapped[str] = mapped_column(String, nullable=True)
sha256_base64: Mapped[str] = mapped_column(String, nullable=True)
sha256_hex: Mapped[str] = mapped_column(String, nullable=True)
etag: Mapped[str] = mapped_column(String, nullable=True)

@hybrid_property
Expand Down

0 comments on commit 0d4ee3a

Please sign in to comment.