Skip to content

Commit

Permalink
Merge pull request #44 from TeoMeWhy/feat/dota
Browse files Browse the repository at this point in the history
Optimize file readers
  • Loading branch information
TeoCalvo authored Aug 5, 2023
2 parents d0c5c86 + d56cd8b commit 9feaff0
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions src/01.raw/dota/pos_collect.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
# Databricks notebook source
from pyspark.sql import types
import pandas as pd

# COMMAND ----------

# DBTITLE 1,Consolida collect
schema = types.StructType(fields=[
types.StructField(name="match_id",
dataType=types.StringType())
])

df = (spark.read
.format("json")
.schema(schema)
.load("/mnt/datalake/dota/matches_details"))
files = [i.name.split(".")[0] for i in dbutils.fs.ls("/mnt/datalake/dota/matches_details")]

df = spark.createDataFrame(pd.DataFrame({"match_id":files}))

(df.write
.format("delta")
Expand Down

0 comments on commit 9feaff0

Please sign in to comment.