-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
3,063 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"presets": [ | ||
[ | ||
"@babel/preset-env", | ||
{ | ||
// "loose": true, | ||
// "debug": true, | ||
"targets": { | ||
"node": "current" | ||
} | ||
} | ||
], | ||
"@babel/preset-typescript" | ||
], | ||
"plugins": [ | ||
"@babel/plugin-syntax-dynamic-import" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,3 +96,6 @@ public | |
|
||
# DynamoDB Local files | ||
.dynamodb/ | ||
|
||
# Project specific | ||
dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
* | ||
!dist/**/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2019 Christian Battaglia | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,23 @@ | ||
{ | ||
"name": "@dtw/files", | ||
"version": "1.0.0", | ||
"description": "", | ||
"main": "index.js", | ||
"description": "nodejs file streams, buffers, I/O and back again container class", | ||
"main": "dist/index.js", | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1" | ||
"build": "babel --out-dir dist --extensions \".ts,.js\" --copy-files --source-maps inline src", | ||
"prepublish": "npm run build" | ||
}, | ||
"author": "", | ||
"license": "ISC", | ||
"author": "Christian Battaglia <[email protected]> (https://cdbattaglia.com)", | ||
"license": "MIT", | ||
"dependencies": { | ||
"@babel/cli": "^7.6.2", | ||
"@babel/core": "^7.6.2", | ||
"@babel/plugin-syntax-dynamic-import": "^7.2.0", | ||
"@babel/preset-env": "^7.6.2", | ||
"@babel/preset-typescript": "^7.6.0", | ||
"@types/mkdirp": "^0.5.2", | ||
"@types/node": "^12.7.8", | ||
"@types/rimraf": "^2.0.2", | ||
"mkdirp": "^0.5.1", | ||
"rimraf": "^3.0.0", | ||
"uuid": "^3.3.3" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2019 Christian Battaglia | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# @dtw/files | ||
|
||
## inspiration | ||
|
||
Working at Amplify (https://amplify.com) for the last year and a half I've been semi pushing forward the concept of isomorphic JS and how, with a little bit of creative engineering, we can build tools that can be placed on the frontend or the backend and moved between as we see fit. | ||
|
||
NodeJS microservices are a bit of a greenfield exercise for us in that we have engineers with experience but we don't necessarily have the libs and the tooling. My previous work has been a lot of Express when it comes to NodeJS for simplicity. It finally came to building tools that we want to fit inside our publishing pipeline and we wanted the concept of Transform Streams for maximum throughput (concurrency). | ||
|
||
In my head, I default to thinking of files and binary data needing transformations as a stream. This comes from a little bit of background information implementing an S3 proxy of sorts using Dropwizard. We wanted to hit a Dropwizard (DW) endpoint with a set of path/query params and maybe some auth and then open a request to S3 using: | ||
|
||
```xml | ||
<dependency> | ||
<groupId>com.amazonaws</groupId> | ||
<artifactId>aws-java-sdk</artifactId> | ||
<version>1.11.126</version> | ||
</dependency> | ||
``` | ||
|
||
and then we wanted to pass that response through to the response from the original client. I quickly found this to be extremely unproductive. I'll go back look up the code when I get a chance and create an example repository of what exactly I found. More or less, we wanted to serve PDFs of size 100MB+ and so obviously there needed to be some response/request stream "chunking" in order to do it optimally and the overhead around the Jersey "InputStream" class wasn't quite working for my needs and was terribly slow. | ||
|
||
Fast forwarding ahead a bit, I got into some discussions with my friend [Adnane Ouahabi](https://www.linkedin.com/in/ouahabi/?originalSubdomain=es) at [Glovo](https://www.linkedin.com/company/glovo-app/) talking about event based architecture for all the Glovo services backed by Java based microservices, [Apache Avro](https://avro.apache.org/) and [Amazon Kineses](https://aws.amazon.com/kinesis/). More or less the discussion was centered around leaving the least amount of information in buffer/memory as possible. When it comes to the static/publishing content of Amplify (think books) I felt that this would be an amazing optimization if we could get it done right. Think about more S3 (cloud storage) and less networking. This means that we could build a pipeline of transforms and pipe data to said microservices with a destination location (maybe a bucket) and do all our publishing in one go. This would also centralize our storage space. | ||
|
||
inputs -> streams -> transforms -> responses -> storage | ||
|
||
 | ||
|
||
In the same vein, we'd have a bipartite graph of inputs and outputs with a graph network of the journey in between. For example, any big publisher or magazine company or CMS system would have very similar context. | ||
|
||
1. author/journalist (any sort of writer) writes content in a system | ||
2. system automatically exports to print and web | ||
3. both print and web have similar contexts so store in the same location to keep track of underlying derivative | ||
|
||
This concept of Kinesis is where this repo comes in. We have PDFs we're downloading from Google Drive slides that we want to run through [Ghostscript](https://www.ghostscript.com/) for optimization purposes (reduce size and compress binary bundled information) and then we want to be able to split the PDF by page and upload all these new pdfs to more or less the same location. | ||
|
||
Google Drive slides -> pdf export -> ghostscript -> hummus -> S3 | ||
|
||
Well, at the same time, we want to "cache" the PDF by an internal version number (https://developers.google.com/drive/api/v3/reference/files#resource) from the export such that we have the raw PDF we can always go back and re-transform when we need to. | ||
|
||
My brain immediately goes to optimizations because that's why I love my computer science job and that means that I want to be able to task-ify as many iterations of this pipe as possible as an async style pipeline. | ||
|
||
cache === stream passthrough/forward response as received to S3 | ||
pdf transform === stream -> I/O -> ghostscript -> I/O -> stream to S3 | ||
|
||
Then my mind immediately went one other location because there's one other use case. What if in the microservice we want to process the file as the whole buffer or chunks of the stream aggregated? Then we'd want stream or a buffer or directly to I/O and then we'd want to interchange between all of those as needed. | ||
|
||
## dependencies | ||
|
||
[Ghostscript](https://www.ghostscript.com/) | ||
- https://formulae.brew.sh/formula/ghostscript#default | ||
- main this is to alias it to `gs` in your `$PATH` or else the PDF opt will error out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"name": "@dtw/files", | ||
"version": "1.0.0", | ||
"description": "nodejs file streams, buffers, I/O and back again container class", | ||
"main": "dist/index.js", | ||
"scripts": { | ||
"build": "babel --out-dir dist --extensions \".ts,.js\" --copy-files --source-maps inline src", | ||
"prepublish": "npm run build" | ||
}, | ||
"author": "Christian Battaglia <[email protected]> (https://cdbattaglia.com)", | ||
"license": "MIT", | ||
"dependencies": { | ||
"@babel/cli": "^7.6.2", | ||
"@babel/core": "^7.6.2", | ||
"@babel/plugin-syntax-dynamic-import": "^7.2.0", | ||
"@babel/preset-env": "^7.6.2", | ||
"@babel/preset-typescript": "^7.6.0", | ||
"@types/mkdirp": "^0.5.2", | ||
"@types/node": "^12.7.8", | ||
"@types/rimraf": "^2.0.2", | ||
"mkdirp": "^0.5.1", | ||
"rimraf": "^3.0.0", | ||
"uuid": "^3.3.3" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export { default as DTWFile } from './file' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"compilerOptions": { | ||
// Target latest version of ECMAScript. | ||
"target": "esnext", | ||
// Search under node_modules for non-relative imports. | ||
"moduleResolution": "node", | ||
// Process & infer types from .js files. | ||
"allowJs": true, | ||
// Don't emit; allow Babel to transform files. | ||
"noEmit": true, | ||
// Enable strictest settings like strictNullChecks & noImplicitAny. | ||
"strict": true, | ||
// Disallow features that require cross-file information for emit. | ||
"isolatedModules": true, | ||
// Import non-ES modules as default imports. | ||
"esModuleInterop": true, | ||
// Resolve JSON | ||
"resolveJsonModule": true, | ||
// Others | ||
"sourceMap": true, | ||
"typeRoots": [ | ||
"node_modules/@types" | ||
] | ||
}, | ||
"include": [ | ||
"src" | ||
] | ||
} |