Skip to content

Commit

Permalink
Merge pull request #46 from TeoMeWhy/feat/dota
Browse files Browse the repository at this point in the history
Remove post ingestion from match_details
  • Loading branch information
TeoCalvo authored Aug 5, 2023
2 parents e2ac19d + 91a7f1c commit 529f838
Showing 1 changed file with 1 addition and 15 deletions.
16 changes: 1 addition & 15 deletions src/01.raw/dota/get_match_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,15 @@ def get_and_save(self, match_id):
data = resp.json()
self.save_data(data)

def post_ingestion():
df = (spark.read
.format("json")
.schema(self.schema)
.load("/mnt/datalake/dota/matches_details/"))

(df.coalesce(1)
.write
.format("delta")
.mode("overwrite")
.save("/mnt/datalake/dota/collect/"))

def auto_execute(self):
print("Obtendo lista de partidas a serem coletadas...")
ids = self.get_match_list()

print("Iniciando coleta de dados de forma paralela...")
print(f"Iniciando coleta de dados de forma paralela. {len(ids)} partidas...")

with Pool(self.pool_size) as p:
p.map(self.get_and_save, ids)

self.post_ingestion()

# COMMAND ----------

try:
Expand Down

0 comments on commit 529f838

Please sign in to comment.