From 7683d65dbc32f3730bd7363c757f65a8a3d4b57e Mon Sep 17 00:00:00 2001 From: riccardo-alle Date: Fri, 1 Mar 2024 11:13:55 +0000 Subject: [PATCH] deploy: 1c3148cc926b8a83774e45421e6d6f90e572c32f --- 404.html | 4 ++-- 404/index.html | 4 ++-- [year]/[month]/[slug]/index.html | 2 +- .../index.json | 2 +- .../_buildManifest.js | 0 .../_ssgManifest.js | 0 authors/index.html | 2 +- blog/index.html | 2 +- events/index.html | 2 +- index.html | 8 ++++---- jobs/index.html | 2 +- open-source/index.html | 2 +- podcast/[slug]/index.html | 2 +- podcast/index.html | 2 +- 14 files changed, 17 insertions(+), 17 deletions(-) rename _next/data/{OLpqX5JFw5NMxyQBdzhDl => ahszDPSeH3NfSLZyM4Kj9}/index.json (65%) rename _next/static/{OLpqX5JFw5NMxyQBdzhDl => ahszDPSeH3NfSLZyM4Kj9}/_buildManifest.js (100%) rename _next/static/{OLpqX5JFw5NMxyQBdzhDl => ahszDPSeH3NfSLZyM4Kj9}/_ssgManifest.js (100%) diff --git a/404.html b/404.html index 458b484..15df453 100644 --- a/404.html +++ b/404.html @@ -1,4 +1,4 @@ -404: This page could not be found

404

This page could not be found.

\ No newline at end of file + }

404

This page could not be found.

\ No newline at end of file diff --git a/404/index.html b/404/index.html index 458b484..15df453 100644 --- a/404/index.html +++ b/404/index.html @@ -1,4 +1,4 @@ -404: This page could not be found

404

This page could not be found.

\ No newline at end of file + }

404

This page could not be found.

\ No newline at end of file diff --git a/[year]/[month]/[slug]/index.html b/[year]/[month]/[slug]/index.html index f8782e0..9cff2f8 100644 --- a/[year]/[month]/[slug]/index.html +++ b/[year]/[month]/[slug]/index.html @@ -1 +1 @@ -Redirecting...
\ No newline at end of file +Redirecting...
\ No newline at end of file diff --git a/_next/data/OLpqX5JFw5NMxyQBdzhDl/index.json b/_next/data/ahszDPSeH3NfSLZyM4Kj9/index.json similarity index 65% rename from _next/data/OLpqX5JFw5NMxyQBdzhDl/index.json rename to _next/data/ahszDPSeH3NfSLZyM4Kj9/index.json index d8375e2..ca69d48 100644 --- a/_next/data/OLpqX5JFw5NMxyQBdzhDl/index.json +++ b/_next/data/ahszDPSeH3NfSLZyM4Kj9/index.json @@ -1 +1 @@ -{"pageProps":{"posts":[{"title":"Trust no one, not even your training data! Machine learning from noisy data","link":"https://blog.allegro.tech/2023/04/learning-from-noisy-data.html","pubDate":"Tue, 18 Apr 2023 00:00:00 +0200","authors":{"author":[{"name":["Alicja Rączkowska"],"photo":["https://blog.allegro.tech/img/authors/alicja.raczkowska.jpg"],"url":["https://blog.allegro.tech/authors/alicja.raczkowska"]},{"name":["Aleksandra Osowska-Kurczab"],"photo":["https://blog.allegro.tech/img/authors/aleksandra.osowska-kurczab.jpg"],"url":["https://blog.allegro.tech/authors/aleksandra.osowska-kurczab"]},{"name":["Jacek Szczerbiński"],"photo":["https://blog.allegro.tech/img/authors/jacek.szczerbinski.jpg"],"url":["https://blog.allegro.tech/authors/jacek.szczerbinski"]},{"name":["Klaudia Nazarko"],"photo":["https://blog.allegro.tech/img/authors/klaudia.nazarko.jpg"],"url":["https://blog.allegro.tech/authors/klaudia.nazarko"]},{"name":["Kalina Kobus"],"photo":["https://blog.allegro.tech/img/authors/kalina.kobus.jpg"],"url":["https://blog.allegro.tech/authors/kalina.kobus"]}]},"content":null,"contentSnippet":"Label noise is ever-present in machine learning practice.\nAllegro datasets are no exception.\nWe compared 7 methods for training classifiers robust to label noise.\nAll of them improved the model’s perf","guid":"https://blog.allegro.tech/2023/04/learning-from-noisy-data.html","categories":["tech","mlr","robustness","research","ml","machine-learning","ai"],"isoDate":"2023-04-17T22:00:00.000Z","thumbnail":"images/post-headers/mlr.png"},{"title":"Turn-Based Offline Reinforcement Learning","link":"https://blog.allegro.tech/2022/04/turn-based-offline-rl.html","pubDate":"Thu, 14 Apr 2022 00:00:00 +0200","authors":{"author":[{"name":["Riccardo Belluzzo"],"photo":["https://blog.allegro.tech/img/authors/riccardo.belluzzo.jpg"],"url":["https://blog.allegro.tech/authors/riccardo.belluzzo"]},{"name":["Tomasz Bocheński"],"photo":["https://blog.allegro.tech/img/authors/tomasz.bochenski.jpg"],"url":["https://blog.allegro.tech/authors/tomasz.bochenski"]},{"name":["Michał Zając"],"photo":["https://blog.allegro.tech/img/authors/michal.zajac.jpg"],"url":["https://blog.allegro.tech/authors/michal.zajac"]},{"name":["Łukasz Kuciński"],"photo":["https://blog.allegro.tech/img/authors/lukasz.kucinski.jpg"],"url":["https://blog.allegro.tech/authors/lukasz.kucinski"]},{"name":["Piotr Miłoś"],"photo":["https://blog.allegro.tech/img/authors/piotr.milos.jpg"],"url":["https://blog.allegro.tech/authors/piotr.milos"]}]},"content":null,"contentSnippet":"This blogpost is the result of a research collaboration between the Allegro Machine Learning Research team and\nthe Institute of Mathematics of the Polish Academy of Sciences (IMPAN), Warsaw.\nIntroduct","guid":"https://blog.allegro.tech/2022/04/turn-based-offline-rl.html","categories":["tech","mlr","rl","research"],"isoDate":"2022-04-13T22:00:00.000Z","thumbnail":"images/post-headers/mlr.png"}],"jobs":[{"id":"743999966506533","name":"Mid/Senior Software Engineer (Machine Learning)","uuid":"156f612b-e9ac-4e19-b7d1-09b6e7ee4798","jobAdId":"9911834a-bde3-46e3-8315-234d5370df16","defaultJobAd":true,"refNumber":"REF4657Q","company":{"identifier":"Allegro","name":"Allegro"},"releasedDate":"2024-02-08T17:22:34.877Z","location":{"city":"Warsaw, Poznań","country":"pl","remote":false},"industry":{"id":"internet","label":"Internet"},"department":{"id":"2572821","label":"IT - Machine Learning"},"function":{"id":"information_technology","label":"Information Technology"},"typeOfEmployment":{"id":"permanent","label":"Full-time"},"experienceLevel":{"id":"mid_senior_level","label":"Mid-Senior Level"},"customField":[{"fieldId":"61583054f15cea434e0be36f","fieldLabel":"Career Level","valueId":"3976147c-fe25-42a8-8c97-78273250960b","valueLabel":"4"},{"fieldId":"61582f70e72a6b6d239c9857","fieldLabel":"Area","valueId":"76599a72-f283-4550-9303-52e2e0eb6e32","valueLabel":"Technology"},{"fieldId":"58c15608e4b01d4b19ddf790","fieldLabel":"Recruitment Process","valueId":"ed4682c7-33c9-41c2-8d13-428ed39046f5","valueLabel":"Tech. Engineer - IC"},{"fieldId":"6406f92e638cbb2f415a94a9","fieldLabel":"Job Area","valueId":"e8731ea4-48a9-476d-ab1d-9a40eb3426f1","valueLabel":"Technology"},{"fieldId":"COUNTRY","fieldLabel":"Country","valueId":"pl","valueLabel":"Poland"},{"fieldId":"58c13159e4b01d4b19ddf729","fieldLabel":"Department","valueId":"2572821","valueLabel":"IT - Machine Learning"},{"fieldId":"58c13159e4b01d4b19ddf728","fieldLabel":"Brands","valueId":"4ccb4fab-6c3f-4ed0-9140-8533fe17447f","valueLabel":"Allegro sp. z o.o."}],"visibility":"PUBLIC","ref":"https://api.smartrecruiters.com/v1/companies/allegro/postings/743999966506533","creator":{"name":"Paulina Siwek"},"language":{"code":"en","label":"English","labelNative":"English (US)"}},{"id":"743999962282954","name":"Senior Data Analyst - Allegro Pay (Machine Learning and Analytics)","uuid":"b8156379-53ec-4b73-93a1-ea9038940d32","jobAdId":"b5af9bf2-cca7-4e08-8164-e9a911a1aa35","defaultJobAd":false,"refNumber":"REF4712M","company":{"identifier":"Allegro","name":"Allegro"},"releasedDate":"2024-01-23T10:24:40.400Z","location":{"city":"Warsaw","country":"pl","remote":false},"industry":{"id":"internet","label":"Internet"},"department":{"id":"5334488","label":"Financial Services (Allegro Pay)"},"function":{"id":"information_technology","label":"Information Technology"},"typeOfEmployment":{"id":"permanent","label":"Full-time"},"experienceLevel":{"id":"mid_senior_level","label":"Mid-Senior Level"},"customField":[{"fieldId":"58c15608e4b01d4b19ddf790","fieldLabel":"Recruitment Process","valueId":"ed4682c7-33c9-41c2-8d13-428ed39046f5","valueLabel":"Tech. Engineer - IC"},{"fieldId":"6406f92e638cbb2f415a94a9","fieldLabel":"Job Area","valueId":"e8731ea4-48a9-476d-ab1d-9a40eb3426f1","valueLabel":"Technology"},{"fieldId":"COUNTRY","fieldLabel":"Country","valueId":"pl","valueLabel":"Poland"},{"fieldId":"61583054f15cea434e0be36f","fieldLabel":"Career Level","valueId":"26b58095-3c5f-4596-937f-27547fb80b07","valueLabel":"5"},{"fieldId":"58c13159e4b01d4b19ddf729","fieldLabel":"Department","valueId":"5334488","valueLabel":"Financial Services (Allegro Pay)"},{"fieldId":"58c13159e4b01d4b19ddf728","fieldLabel":"Brands","valueId":"9c8396d4-11a6-443c-897c-15f29221a3fd","valueLabel":"Allegro Pay sp. z o.o."},{"fieldId":"61582f70e72a6b6d239c9857","fieldLabel":"Area","valueId":"76599a72-f283-4550-9303-52e2e0eb6e32","valueLabel":"Technology"}],"visibility":"PUBLIC","ref":"https://api.smartrecruiters.com/v1/companies/allegro/postings/743999962282954","language":{"code":"en","label":"English","labelNative":"English (US)"}}],"papers":[{"authors":"Aleksandra Chrabrowa, Tsimur Hadeliya, Dariusz Kajtoch, Robert Mroczkowski, Piotr Rybak","date":"2023","paper_url":"https://aclanthology.org/2023.findings-eacl.68/","accepted_at":"Findings of the Association for Computational Linguistics: EACL 2023","paper_title":"Going beyond research datasets: Novel intent discovery in the industry setting"},{"authors":"Aleksandra Chrabrowa, Łukasz Dragan, Karol Grzegorczyk, Dariusz Kajtoch, Mikołaj Koszowski, Robert Mroczkowski, Piotr Rybak","date":"2022","paper_url":"http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.466.pdf","accepted_at":"Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022)","paper_title":"Evaluation of Transfer Learning for Polish with a Text-to-Text Model"},{"authors":"Mikołaj Koszowski, Karol Grzegorczyk, Tsimur Hadeliya","date":"2021","paper_url":"https://aclanthology.org/2021.wmt-1.10/","accepted_at":"Proceedings of the Sixth Conference on Machine Translation","paper_title":"Allegro.eu Submission to WMT21 News Translation Task"},{"authors":"Robert Mroczkowski, Piotr Rybak, Alina Wróblewska, Ireneusz Gawlik","date":"2021","paper_url":"https://www.aclweb.org/anthology/2021.bsnlp-1.1/","accepted_at":"BSNLP, accepted long paper","paper_title":"HerBERT: Efficiently Pretrained Transformer-based Language Model for Polish"},{"authors":"Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik","date":"2020","paper_url":"https://www.aclweb.org/anthology/2020.acl-main.111/","accepted_at":"ACL 2020, accepted long paper","paper_title":"KLEJ: Comprehensive Benchmark for Polish Language Understanding"},{"authors":"Przemysław Pobrotyn, Tomasz Bartczak, Mikołaj Synowiec, Radosław Białobrzeski, Jarosław Bojar","date":"2020","paper_url":"https://arxiv.org/abs/2005.10084","accepted_at":"SIGIR eCommerce Workshop 2020, contributed talk","paper_title":"Context-Aware Learning to Rank with Self-Attention"},{"authors":"Przemysław Pobrotyn, Radosław Białobrzeski","date":"2020","paper_url":"https://arxiv.org/abs/2102.07831","accepted_at":"The 2021 SIGIR Workshop On eCommerce (SIGIR eCom ’21)","paper_title":"NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting"},{"authors":"Janusz Tracz, Piotr Wójcik, Kalina Jasinska-Kobus, Riccardo Belluzzo, Robert Mroczkowski, Ireneusz Gawlik","date":"2020","paper_url":"https://www.aclweb.org/anthology/2020.ecomnlp-1.7/","accepted_at":"EComNLP 2020 COLING Workshop on Natural Language Processing in E-Commerce","paper_title":"BERT-based similarity learning for product matching"}],"videos":[{"title":"The structure of customer service data @Allegro","url":"https://www.youtube.com/watch?v=jK-NX_qufjI","who":"Aleksandra Chrabrowa","description":"GHOST Day: AMLC 2022","thumb":"images/video-headers/ola-ghost22.png"},{"title":"Retrieval at Scale","url":"https://www.youtube.com/watch?v=fP0e3nuUyRY","who":"Aleksandra Osowska-Kurczab & Jacek Szczerbiński","description":"Sponsored talk by Allegro for ML in PL Conference 2022","thumb":"images/video-headers/ola-jacek-mlinpl-2022.png"},{"title":"Use Your Data Wisely – Data-Centric NLP in the E-commerce Domain","url":"https://www.youtube.com/watch?v=zEgU0mIKiVA&t","who":"Paweł Olszewski","description":"Seminar at Warsaw.ai - Episode XV - 2.06.2022","thumb":"images/video-headers/pawel-warsawai.png"},{"title":"Evaluation of Transfer Learning for Polish with a Text-to-Text Model","url":"https://s3.eu-west-2.wasabisys.com/lrec2022/sessions/575.mp4","who":"Dariusz Kajtoch","description":"Paper presentation at LREC 2022","thumb":"images/video-headers/darek-lrec2022.png"}],"videos2":[{"title":"Introduction to Offline Reinforcement Learning and its applications","url":"https://www.youtube.com/watch?v=zF8TcTgcmRM","who":"Riccardo Belluzzo","description":"Seminar at the University of Padua","thumb":"images/video-headers/riccardo-padua.png"},{"title":"plT5: Universal Model For Polish Language","url":"https://www.youtube.com/watch?v=BJYL7QZD6z4","who":"Dariusz Kajtoch","description":"ML in PL 2021","thumb":"images/video-headers/darek-plt5.png"},{"title":"How to translate Allegro into foreign languages","url":"https://www.youtube.com/watch?v=6KDU5TZohpM&list=PLzveSKBX_3N7yPb4ErB5HJ83eB6XvH37C&index=27","who":"Karol Grzegorczyk","description":"ATM 2021 public track presentation","thumb":"images/video-headers/karol-atm.png"},{"title":"“Do you speak Allegro?” Large Scale Language Modeling","url":"https://www.youtube.com/watch?v=6T-R4kgIbBs&list=PLzveSKBX_3N7yPb4ErB5HJ83eB6XvH37C&index=19","who":"Riccardo Belluzzo","description":"ATM 2021 public track presentation","thumb":"images/video-headers/riccardo-atm.png"}],"open_source":[{"name":"Hugging Face Allegro","url":"https://huggingface.co/allegro","description":"We contribute to the NLP community by publishing models and datasets to the Hugging Face Hub!\n"},{"name":"llm-wrapper","url":"https://github.com/allegro/llm-wrapper","description":"Versatile and powerful library designed to streamline the process of querying large language models.\n* Simple and User-Friendly Interface,\n* Asynchronous Querying,\n* Automatic Retrying Mechanism,\n* Error Handling and Management,\n* Output Parsing"},{"name":"allRank","url":"https://github.com/allegro/allRank","description":"Framework for training neural Learning-to-Rank (LTR) models,\nfeaturing implementations of:\n* common pointwise, pairwise and listwise loss function,\n* fully connected and Transformer-like scoring function,\n* commonly used evaluation metrics like Normalized Discounted Cumulative Gain (NDCG) and Mean Reciprocal Rank (MRR},\n* click-models for experiments on simulated click-through data"},{"name":"KLEJ Benchmark","url":"https://klejbenchmark.com/","icon":"FaTint","description":"The KLEJ benchmark (Kompleksowa Lista Ewaluacji Językowych) is a set of nine evaluation tasks for the Polish language understanding. Key benchmark features:\n* It contains a diverse set of tasks from different domains and with different objectives,\n* Most tasks are created from existing datasets but we also release the new sentiment analysis dataset from an e-commerce domain."},{"name":"HerBERT","url":"https://huggingface.co/allegro/herbert-large-cased","description":"HerBERT is a BERT-based language model trained on six different corpora for Polish language understanding. It achieves state-of-the-art results on multiple downstream tasks, including [KLEJ Benchmark](https://klejbenchmark.com/) and Part-of-Speech tagging. We release both Base and Large variants of the model as a part of [transformers](https://github.com/huggingface/transformers) library for anyone to use."}],"teams":[{"name":"Machine Translation","icon":"FaRobot","description":"We are developing an in-house Machine Translation engine specifically for e-commerce purposes, aiming to provide better value compared to off-the-shelf solutions. Our focus is on accurately translating industry-specific terms and jargon, while also creating a scalable and cost-efficient solution. We employ state-of-the-art machine learning methods, involving human evaluators and automatic quality estimation models to continually enhance translation quality. Our goal is to make our platform accessible to non-Polish speakers globally and contribute to the machine translation community."},{"name":"Language Modeling","icon":"FaSitemap","description":"We employ state-of-the-art deep learning models and a range of NLP algorithms to solve diverse problems that require semantic understanding of the specialized language used within a unique environment of an e-commerce platform. We utilize and develop Large Language Models (LLMs), with the goal of providing the company with general purpose Foundation Models that can be tailored for specific downstream tasks. On a daily basis, we use our models in the following applications: Semantic Search, Question Answering, Conversational AI, Generative AI, Named Entity Recognition."},{"name":"Learning to Rank","icon":"FaList","description":"In Learning to Rank our goal is to develop machine learning models for search. Our main focus is on ranking solutions in all phases of the search pipeline, serving millions of searches a day. Currently our main area of expertise is neural text-based search and relevance. We’re also interested in topics such as reranking, feature interaction architectures, and personalization."},{"name":"Computer Vision","icon":"FaImages","description":"At MLR Computer Vision, our primary objective is to elevate the user experience by leveraging machine learning image processing algorithms. We specifically concentrate on image representation learning for Visual Search and the development of robust image classification models. Presently, our research is focused on the integration of multiple modalities into our models. This integration enables our models to process not only images but also harness diverse sources of information such as product titles, descriptions, and attributes. The implementation of these multimodal models holds significant potential in various domains, including semantic search and the enhancement of product catalog quality. By employing such models, we aim to deliver superior solutions in these areas, ultimately providing enhanced user experiences."},{"name":"Recommendations","icon":"FaShoppingBag","description":"Our team's primary objective is to fulfill users' needs by providing them with a diverse range of products that align with their interests. We strive to inspire users and connect them with relevant offers by leveraging recommender systems. To achieve this, we rely on the collective behaviors of our user-base, forming the foundation of our algorithms. However, we also incorporate content features of the items into our models, enriching recommendations with exploratory algorithms. These algorithms not only utilize historical data but also actively engage with the world, enabling us to explore new possibilities. Our major challenges revolve around developing innovative algorithms that can deliver high-quality recommendations while effectively handling Allegro's significant daily traffic. This ambitious endeavor requires us to operate at scale, ensuring seamless user experiences across the platform."},{"name":"ML Ops","icon":"FaSignal","description":"The MLOps team aims to optimize, scale, and deploy advanced machine learning models. We blend artificial intelligence, software engineering, and DevOps expertise to embrace the full potential of research engineers and data scientists from other teams. We orchestrate the entire machine learning lifecycle, from data preprocessing and annotation to model deployment, using the cutting-edge infrastructure of Google Cloud and Kubernetes. We're operating at a massive scale with several terabytes of data processed daily and thousands of predictions per second. "}]},"__N_SSG":true} \ No newline at end of file +{"pageProps":{"posts":[{"title":"Trust no one, not even your training data! Machine learning from noisy data","link":"https://blog.allegro.tech/2023/04/learning-from-noisy-data.html","pubDate":"Tue, 18 Apr 2023 00:00:00 +0200","authors":{"author":[{"name":["Alicja Rączkowska"],"photo":["https://blog.allegro.tech/img/authors/alicja.raczkowska.jpg"],"url":["https://blog.allegro.tech/authors/alicja.raczkowska"]},{"name":["Aleksandra Osowska-Kurczab"],"photo":["https://blog.allegro.tech/img/authors/aleksandra.osowska-kurczab.jpg"],"url":["https://blog.allegro.tech/authors/aleksandra.osowska-kurczab"]},{"name":["Jacek Szczerbiński"],"photo":["https://blog.allegro.tech/img/authors/jacek.szczerbinski.jpg"],"url":["https://blog.allegro.tech/authors/jacek.szczerbinski"]},{"name":["Klaudia Nazarko"],"photo":["https://blog.allegro.tech/img/authors/klaudia.nazarko.jpg"],"url":["https://blog.allegro.tech/authors/klaudia.nazarko"]},{"name":["Kalina Kobus"],"photo":["https://blog.allegro.tech/img/authors/kalina.kobus.jpg"],"url":["https://blog.allegro.tech/authors/kalina.kobus"]}]},"content":null,"contentSnippet":"Label noise is ever-present in machine learning practice.\nAllegro datasets are no exception.\nWe compared 7 methods for training classifiers robust to label noise.\nAll of them improved the model’s perf","guid":"https://blog.allegro.tech/2023/04/learning-from-noisy-data.html","categories":["tech","mlr","robustness","research","ml","machine-learning","ai"],"isoDate":"2023-04-17T22:00:00.000Z","thumbnail":"images/post-headers/mlr.png"},{"title":"Turn-Based Offline Reinforcement Learning","link":"https://blog.allegro.tech/2022/04/turn-based-offline-rl.html","pubDate":"Thu, 14 Apr 2022 00:00:00 +0200","authors":{"author":[{"name":["Riccardo Belluzzo"],"photo":["https://blog.allegro.tech/img/authors/riccardo.belluzzo.jpg"],"url":["https://blog.allegro.tech/authors/riccardo.belluzzo"]},{"name":["Tomasz Bocheński"],"photo":["https://blog.allegro.tech/img/authors/tomasz.bochenski.jpg"],"url":["https://blog.allegro.tech/authors/tomasz.bochenski"]},{"name":["Michał Zając"],"photo":["https://blog.allegro.tech/img/authors/michal.zajac.jpg"],"url":["https://blog.allegro.tech/authors/michal.zajac"]},{"name":["Łukasz Kuciński"],"photo":["https://blog.allegro.tech/img/authors/lukasz.kucinski.jpg"],"url":["https://blog.allegro.tech/authors/lukasz.kucinski"]},{"name":["Piotr Miłoś"],"photo":["https://blog.allegro.tech/img/authors/piotr.milos.jpg"],"url":["https://blog.allegro.tech/authors/piotr.milos"]}]},"content":null,"contentSnippet":"This blogpost is the result of a research collaboration between the Allegro Machine Learning Research team and\nthe Institute of Mathematics of the Polish Academy of Sciences (IMPAN), Warsaw.\nIntroduct","guid":"https://blog.allegro.tech/2022/04/turn-based-offline-rl.html","categories":["tech","mlr","rl","research"],"isoDate":"2022-04-13T22:00:00.000Z","thumbnail":"images/post-headers/mlr.png"}],"jobs":[{"id":"743999966506533","name":"Mid/Senior Software Engineer (Machine Learning)","uuid":"156f612b-e9ac-4e19-b7d1-09b6e7ee4798","jobAdId":"9911834a-bde3-46e3-8315-234d5370df16","defaultJobAd":true,"refNumber":"REF4657Q","company":{"identifier":"Allegro","name":"Allegro"},"releasedDate":"2024-02-08T17:22:34.877Z","location":{"city":"Warsaw, Poznań","country":"pl","remote":false},"industry":{"id":"internet","label":"Internet"},"department":{"id":"2572821","label":"IT - Machine Learning"},"function":{"id":"information_technology","label":"Information Technology"},"typeOfEmployment":{"id":"permanent","label":"Full-time"},"experienceLevel":{"id":"mid_senior_level","label":"Mid-Senior Level"},"customField":[{"fieldId":"61583054f15cea434e0be36f","fieldLabel":"Career Level","valueId":"3976147c-fe25-42a8-8c97-78273250960b","valueLabel":"4"},{"fieldId":"61582f70e72a6b6d239c9857","fieldLabel":"Area","valueId":"76599a72-f283-4550-9303-52e2e0eb6e32","valueLabel":"Technology"},{"fieldId":"58c15608e4b01d4b19ddf790","fieldLabel":"Recruitment Process","valueId":"ed4682c7-33c9-41c2-8d13-428ed39046f5","valueLabel":"Tech. Engineer - IC"},{"fieldId":"6406f92e638cbb2f415a94a9","fieldLabel":"Job Area","valueId":"e8731ea4-48a9-476d-ab1d-9a40eb3426f1","valueLabel":"Technology"},{"fieldId":"COUNTRY","fieldLabel":"Country","valueId":"pl","valueLabel":"Poland"},{"fieldId":"58c13159e4b01d4b19ddf729","fieldLabel":"Department","valueId":"2572821","valueLabel":"IT - Machine Learning"},{"fieldId":"58c13159e4b01d4b19ddf728","fieldLabel":"Brands","valueId":"4ccb4fab-6c3f-4ed0-9140-8533fe17447f","valueLabel":"Allegro sp. z o.o."}],"visibility":"PUBLIC","ref":"https://api.smartrecruiters.com/v1/companies/allegro/postings/743999966506533","creator":{"name":"Paulina Siwek"},"language":{"code":"en","label":"English","labelNative":"English (US)"}},{"id":"743999962282954","name":"Senior Data Analyst - Allegro Pay (Machine Learning and Analytics)","uuid":"b8156379-53ec-4b73-93a1-ea9038940d32","jobAdId":"b5af9bf2-cca7-4e08-8164-e9a911a1aa35","defaultJobAd":false,"refNumber":"REF4712M","company":{"identifier":"Allegro","name":"Allegro"},"releasedDate":"2024-01-23T10:24:40.400Z","location":{"city":"Warsaw","country":"pl","remote":false},"industry":{"id":"internet","label":"Internet"},"department":{"id":"5334488","label":"Financial Services (Allegro Pay)"},"function":{"id":"information_technology","label":"Information Technology"},"typeOfEmployment":{"id":"permanent","label":"Full-time"},"experienceLevel":{"id":"mid_senior_level","label":"Mid-Senior Level"},"customField":[{"fieldId":"58c15608e4b01d4b19ddf790","fieldLabel":"Recruitment Process","valueId":"ed4682c7-33c9-41c2-8d13-428ed39046f5","valueLabel":"Tech. Engineer - IC"},{"fieldId":"6406f92e638cbb2f415a94a9","fieldLabel":"Job Area","valueId":"e8731ea4-48a9-476d-ab1d-9a40eb3426f1","valueLabel":"Technology"},{"fieldId":"COUNTRY","fieldLabel":"Country","valueId":"pl","valueLabel":"Poland"},{"fieldId":"61583054f15cea434e0be36f","fieldLabel":"Career Level","valueId":"26b58095-3c5f-4596-937f-27547fb80b07","valueLabel":"5"},{"fieldId":"58c13159e4b01d4b19ddf729","fieldLabel":"Department","valueId":"5334488","valueLabel":"Financial Services (Allegro Pay)"},{"fieldId":"58c13159e4b01d4b19ddf728","fieldLabel":"Brands","valueId":"9c8396d4-11a6-443c-897c-15f29221a3fd","valueLabel":"Allegro Pay sp. z o.o."},{"fieldId":"61582f70e72a6b6d239c9857","fieldLabel":"Area","valueId":"76599a72-f283-4550-9303-52e2e0eb6e32","valueLabel":"Technology"}],"visibility":"PUBLIC","ref":"https://api.smartrecruiters.com/v1/companies/allegro/postings/743999962282954","language":{"code":"en","label":"English","labelNative":"English (US)"}}],"papers":[{"authors":"Aleksandra Chrabrowa, Tsimur Hadeliya, Dariusz Kajtoch, Robert Mroczkowski, Piotr Rybak","date":"2023","paper_url":"https://aclanthology.org/2023.findings-eacl.68/","accepted_at":"Findings of the Association for Computational Linguistics: EACL 2023","paper_title":"Going beyond research datasets: Novel intent discovery in the industry setting"},{"authors":"Aleksandra Chrabrowa, Łukasz Dragan, Karol Grzegorczyk, Dariusz Kajtoch, Mikołaj Koszowski, Robert Mroczkowski, Piotr Rybak","date":"2022","paper_url":"http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.466.pdf","accepted_at":"Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022)","paper_title":"Evaluation of Transfer Learning for Polish with a Text-to-Text Model"},{"authors":"Mikołaj Koszowski, Karol Grzegorczyk, Tsimur Hadeliya","date":"2021","paper_url":"https://aclanthology.org/2021.wmt-1.10/","accepted_at":"Proceedings of the Sixth Conference on Machine Translation","paper_title":"Allegro.eu Submission to WMT21 News Translation Task"},{"authors":"Robert Mroczkowski, Piotr Rybak, Alina Wróblewska, Ireneusz Gawlik","date":"2021","paper_url":"https://www.aclweb.org/anthology/2021.bsnlp-1.1/","accepted_at":"BSNLP, accepted long paper","paper_title":"HerBERT: Efficiently Pretrained Transformer-based Language Model for Polish"},{"authors":"Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik","date":"2020","paper_url":"https://www.aclweb.org/anthology/2020.acl-main.111/","accepted_at":"ACL 2020, accepted long paper","paper_title":"KLEJ: Comprehensive Benchmark for Polish Language Understanding"},{"authors":"Przemysław Pobrotyn, Tomasz Bartczak, Mikołaj Synowiec, Radosław Białobrzeski, Jarosław Bojar","date":"2020","paper_url":"https://arxiv.org/abs/2005.10084","accepted_at":"SIGIR eCommerce Workshop 2020, contributed talk","paper_title":"Context-Aware Learning to Rank with Self-Attention"},{"authors":"Przemysław Pobrotyn, Radosław Białobrzeski","date":"2020","paper_url":"https://arxiv.org/abs/2102.07831","accepted_at":"The 2021 SIGIR Workshop On eCommerce (SIGIR eCom ’21)","paper_title":"NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting"},{"authors":"Janusz Tracz, Piotr Wójcik, Kalina Jasinska-Kobus, Riccardo Belluzzo, Robert Mroczkowski, Ireneusz Gawlik","date":"2020","paper_url":"https://www.aclweb.org/anthology/2020.ecomnlp-1.7/","accepted_at":"EComNLP 2020 COLING Workshop on Natural Language Processing in E-Commerce","paper_title":"BERT-based similarity learning for product matching"}],"videos":[{"title":"The structure of customer service data @Allegro","url":"https://www.youtube.com/watch?v=jK-NX_qufjI","who":"Aleksandra Chrabrowa","description":"GHOST Day: AMLC 2022","thumb":"images/video-headers/ola-ghost22.png"},{"title":"Retrieval at Scale","url":"https://www.youtube.com/watch?v=fP0e3nuUyRY","who":"Aleksandra Osowska-Kurczab & Jacek Szczerbiński","description":"Sponsored talk by Allegro for ML in PL Conference 2022","thumb":"images/video-headers/ola-jacek-mlinpl-2022.png"},{"title":"Use Your Data Wisely – Data-Centric NLP in the E-commerce Domain","url":"https://www.youtube.com/watch?v=zEgU0mIKiVA&t","who":"Paweł Olszewski","description":"Seminar at Warsaw.ai - Episode XV - 2.06.2022","thumb":"images/video-headers/pawel-warsawai.png"},{"title":"Evaluation of Transfer Learning for Polish with a Text-to-Text Model","url":"https://s3.eu-west-2.wasabisys.com/lrec2022/sessions/575.mp4","who":"Dariusz Kajtoch","description":"Paper presentation at LREC 2022","thumb":"images/video-headers/darek-lrec2022.png"}],"videos2":[{"title":"Introduction to Offline Reinforcement Learning and its applications","url":"https://www.youtube.com/watch?v=zF8TcTgcmRM","who":"Riccardo Belluzzo","description":"Seminar at the University of Padua","thumb":"images/video-headers/riccardo-padua.png"},{"title":"plT5: Universal Model For Polish Language","url":"https://www.youtube.com/watch?v=BJYL7QZD6z4","who":"Dariusz Kajtoch","description":"ML in PL 2021","thumb":"images/video-headers/darek-plt5.png"},{"title":"How to translate Allegro into foreign languages","url":"https://www.youtube.com/watch?v=6KDU5TZohpM&list=PLzveSKBX_3N7yPb4ErB5HJ83eB6XvH37C&index=27","who":"Karol Grzegorczyk","description":"ATM 2021 public track presentation","thumb":"images/video-headers/karol-atm.png"},{"title":"“Do you speak Allegro?” Large Scale Language Modeling","url":"https://www.youtube.com/watch?v=6T-R4kgIbBs&list=PLzveSKBX_3N7yPb4ErB5HJ83eB6XvH37C&index=19","who":"Riccardo Belluzzo","description":"ATM 2021 public track presentation","thumb":"images/video-headers/riccardo-atm.png"}],"open_source":[{"name":"Hugging Face Allegro","url":"https://huggingface.co/allegro","description":"We contribute to the NLP community by publishing models and datasets to the Hugging Face Hub!\n"},{"name":"allms","url":"https://github.com/allegro/allms","description":"Versatile and powerful library designed to streamline the process of querying large language models.\n* Simple and User-Friendly Interface,\n* Asynchronous Querying,\n* Automatic Retrying Mechanism,\n* Error Handling and Management,\n* Output Parsing"},{"name":"allRank","url":"https://github.com/allegro/allRank","description":"Framework for training neural Learning-to-Rank (LTR) models,\nfeaturing implementations of:\n* common pointwise, pairwise and listwise loss function,\n* fully connected and Transformer-like scoring function,\n* commonly used evaluation metrics like Normalized Discounted Cumulative Gain (NDCG) and Mean Reciprocal Rank (MRR},\n* click-models for experiments on simulated click-through data"},{"name":"KLEJ Benchmark","url":"https://klejbenchmark.com/","icon":"FaTint","description":"The KLEJ benchmark (Kompleksowa Lista Ewaluacji Językowych) is a set of nine evaluation tasks for the Polish language understanding. Key benchmark features:\n* It contains a diverse set of tasks from different domains and with different objectives,\n* Most tasks are created from existing datasets but we also release the new sentiment analysis dataset from an e-commerce domain."},{"name":"HerBERT","url":"https://huggingface.co/allegro/herbert-large-cased","description":"HerBERT is a BERT-based language model trained on six different corpora for Polish language understanding. It achieves state-of-the-art results on multiple downstream tasks, including [KLEJ Benchmark](https://klejbenchmark.com/) and Part-of-Speech tagging. We release both Base and Large variants of the model as a part of [transformers](https://github.com/huggingface/transformers) library for anyone to use."}],"teams":[{"name":"Machine Translation","icon":"FaRobot","description":"We are developing an in-house Machine Translation engine specifically for e-commerce purposes, aiming to provide better value compared to off-the-shelf solutions. Our focus is on accurately translating industry-specific terms and jargon, while also creating a scalable and cost-efficient solution. We employ state-of-the-art machine learning methods, involving human evaluators and automatic quality estimation models to continually enhance translation quality. Our goal is to make our platform accessible to non-Polish speakers globally and contribute to the machine translation community."},{"name":"Language Modeling","icon":"FaSitemap","description":"We employ state-of-the-art deep learning models and a range of NLP algorithms to solve diverse problems that require semantic understanding of the specialized language used within a unique environment of an e-commerce platform. We utilize and develop Large Language Models (LLMs), with the goal of providing the company with general purpose Foundation Models that can be tailored for specific downstream tasks. On a daily basis, we use our models in the following applications: Semantic Search, Question Answering, Conversational AI, Generative AI, Named Entity Recognition."},{"name":"Learning to Rank","icon":"FaList","description":"In Learning to Rank our goal is to develop machine learning models for search. Our main focus is on ranking solutions in all phases of the search pipeline, serving millions of searches a day. Currently our main area of expertise is neural text-based search and relevance. We’re also interested in topics such as reranking, feature interaction architectures, and personalization."},{"name":"Computer Vision","icon":"FaImages","description":"At MLR Computer Vision, our primary objective is to elevate the user experience by leveraging machine learning image processing algorithms. We specifically concentrate on image representation learning for Visual Search and the development of robust image classification models. Presently, our research is focused on the integration of multiple modalities into our models. This integration enables our models to process not only images but also harness diverse sources of information such as product titles, descriptions, and attributes. The implementation of these multimodal models holds significant potential in various domains, including semantic search and the enhancement of product catalog quality. By employing such models, we aim to deliver superior solutions in these areas, ultimately providing enhanced user experiences."},{"name":"Recommendations","icon":"FaShoppingBag","description":"Our team's primary objective is to fulfill users' needs by providing them with a diverse range of products that align with their interests. We strive to inspire users and connect them with relevant offers by leveraging recommender systems. To achieve this, we rely on the collective behaviors of our user-base, forming the foundation of our algorithms. However, we also incorporate content features of the items into our models, enriching recommendations with exploratory algorithms. These algorithms not only utilize historical data but also actively engage with the world, enabling us to explore new possibilities. Our major challenges revolve around developing innovative algorithms that can deliver high-quality recommendations while effectively handling Allegro's significant daily traffic. This ambitious endeavor requires us to operate at scale, ensuring seamless user experiences across the platform."},{"name":"ML Ops","icon":"FaSignal","description":"The MLOps team aims to optimize, scale, and deploy advanced machine learning models. We blend artificial intelligence, software engineering, and DevOps expertise to embrace the full potential of research engineers and data scientists from other teams. We orchestrate the entire machine learning lifecycle, from data preprocessing and annotation to model deployment, using the cutting-edge infrastructure of Google Cloud and Kubernetes. We're operating at a massive scale with several terabytes of data processed daily and thousands of predictions per second. "}]},"__N_SSG":true} \ No newline at end of file diff --git a/_next/static/OLpqX5JFw5NMxyQBdzhDl/_buildManifest.js b/_next/static/ahszDPSeH3NfSLZyM4Kj9/_buildManifest.js similarity index 100% rename from _next/static/OLpqX5JFw5NMxyQBdzhDl/_buildManifest.js rename to _next/static/ahszDPSeH3NfSLZyM4Kj9/_buildManifest.js diff --git a/_next/static/OLpqX5JFw5NMxyQBdzhDl/_ssgManifest.js b/_next/static/ahszDPSeH3NfSLZyM4Kj9/_ssgManifest.js similarity index 100% rename from _next/static/OLpqX5JFw5NMxyQBdzhDl/_ssgManifest.js rename to _next/static/ahszDPSeH3NfSLZyM4Kj9/_ssgManifest.js diff --git a/authors/index.html b/authors/index.html index 3039fbe..8657549 100644 --- a/authors/index.html +++ b/authors/index.html @@ -1 +1 @@ -Redirecting...
\ No newline at end of file +Redirecting...
\ No newline at end of file diff --git a/blog/index.html b/blog/index.html index 5cd4409..1065e36 100644 --- a/blog/index.html +++ b/blog/index.html @@ -1 +1 @@ -Redirecting...
\ No newline at end of file +Redirecting...
\ No newline at end of file diff --git a/events/index.html b/events/index.html index bddfe53..33117a9 100644 --- a/events/index.html +++ b/events/index.html @@ -1 +1 @@ -Redirecting...
\ No newline at end of file +Redirecting...
\ No newline at end of file diff --git a/index.html b/index.html index a3d6aae..79c236d 100644 --- a/index.html +++ b/index.html @@ -1,15 +1,15 @@ -Allegro ML Research

About us

Machine Learning Research is Allegro’s R&D lab created to develop and apply state-of-the-art machine learning methods, helping Allegro grow and innovate with artificial intelligence. Beyond bringing AI to production, we are committed to advance the understanding of machine learning through open collaboration with the scientific community.

Areas

Machine Translation

We are developing an in-house Machine Translation engine specifically for e-commerce purposes, aiming to provide better value compared to off-the-shelf solutions. Our focus is on accurately translating industry-specific terms and jargon, while also creating a scalable and cost-efficient solution. We employ state-of-the-art machine learning methods, involving human evaluators and automatic quality estimation models to continually enhance translation quality. Our goal is to make our platform accessible to non-Polish speakers globally and contribute to the machine translation community.

Language Modeling

We employ state-of-the-art deep learning models and a range of NLP algorithms to solve diverse problems that require semantic understanding of the specialized language used within a unique environment of an e-commerce platform. We utilize and develop Large Language Models (LLMs), with the goal of providing the company with general purpose Foundation Models that can be tailored for specific downstream tasks. On a daily basis, we use our models in the following applications: Semantic Search, Question Answering, Conversational AI, Generative AI, Named Entity Recognition.

Learning to Rank

In Learning to Rank our goal is to develop machine learning models for search. Our main focus is on ranking solutions in all phases of the search pipeline, serving millions of searches a day. Currently our main area of expertise is neural text-based search and relevance. We’re also interested in topics such as reranking, feature interaction architectures, and personalization.

Computer Vision

At MLR Computer Vision, our primary objective is to elevate the user experience by leveraging machine learning image processing algorithms. We specifically concentrate on image representation learning for Visual Search and the development of robust image classification models. Presently, our research is focused on the integration of multiple modalities into our models. This integration enables our models to process not only images but also harness diverse sources of information such as product titles, descriptions, and attributes. The implementation of these multimodal models holds significant potential in various domains, including semantic search and the enhancement of product catalog quality. By employing such models, we aim to deliver superior solutions in these areas, ultimately providing enhanced user experiences.

Recommendations

Our team's primary objective is to fulfill users' needs by providing them with a diverse range of products that align with their interests. We strive to inspire users and connect them with relevant offers by leveraging recommender systems. To achieve this, we rely on the collective behaviors of our user-base, forming the foundation of our algorithms. However, we also incorporate content features of the items into our models, enriching recommendations with exploratory algorithms. These algorithms not only utilize historical data but also actively engage with the world, enabling us to explore new possibilities. Our major challenges revolve around developing innovative algorithms that can deliver high-quality recommendations while effectively handling Allegro's significant daily traffic. This ambitious endeavor requires us to operate at scale, ensuring seamless user experiences across the platform.

ML Ops

The MLOps team aims to optimize, scale, and deploy advanced machine learning models. We blend artificial intelligence, software engineering, and DevOps expertise to embrace the full potential of research engineers and data scientists from other teams. We orchestrate the entire machine learning lifecycle, from data preprocessing and annotation to model deployment, using the cutting-edge infrastructure of Google Cloud and Kubernetes. We're operating at a massive scale with several terabytes of data processed daily and thousands of predictions per second.

Talks

Retrieval at Scale

Retrieval at Scale

Aleksandra Osowska-Kurczab & Jacek Szczerbiński

Sponsored talk by Allegro for ML in PL Conference 2022

Watch

Blog

Trust no one, not even your training data! Machine learning from noisy data

Trust no one, not even your training data! Machine learning from noisy data

Label noise is ever-present in machine learning practice. +Allegro ML Research

About us

Machine Learning Research is Allegro’s R&D lab created to develop and apply state-of-the-art machine learning methods, helping Allegro grow and innovate with artificial intelligence. Beyond bringing AI to production, we are committed to advance the understanding of machine learning through open collaboration with the scientific community.

Areas

Machine Translation

We are developing an in-house Machine Translation engine specifically for e-commerce purposes, aiming to provide better value compared to off-the-shelf solutions. Our focus is on accurately translating industry-specific terms and jargon, while also creating a scalable and cost-efficient solution. We employ state-of-the-art machine learning methods, involving human evaluators and automatic quality estimation models to continually enhance translation quality. Our goal is to make our platform accessible to non-Polish speakers globally and contribute to the machine translation community.

Language Modeling

We employ state-of-the-art deep learning models and a range of NLP algorithms to solve diverse problems that require semantic understanding of the specialized language used within a unique environment of an e-commerce platform. We utilize and develop Large Language Models (LLMs), with the goal of providing the company with general purpose Foundation Models that can be tailored for specific downstream tasks. On a daily basis, we use our models in the following applications: Semantic Search, Question Answering, Conversational AI, Generative AI, Named Entity Recognition.

Learning to Rank

In Learning to Rank our goal is to develop machine learning models for search. Our main focus is on ranking solutions in all phases of the search pipeline, serving millions of searches a day. Currently our main area of expertise is neural text-based search and relevance. We’re also interested in topics such as reranking, feature interaction architectures, and personalization.

Computer Vision

At MLR Computer Vision, our primary objective is to elevate the user experience by leveraging machine learning image processing algorithms. We specifically concentrate on image representation learning for Visual Search and the development of robust image classification models. Presently, our research is focused on the integration of multiple modalities into our models. This integration enables our models to process not only images but also harness diverse sources of information such as product titles, descriptions, and attributes. The implementation of these multimodal models holds significant potential in various domains, including semantic search and the enhancement of product catalog quality. By employing such models, we aim to deliver superior solutions in these areas, ultimately providing enhanced user experiences.

Recommendations

Our team's primary objective is to fulfill users' needs by providing them with a diverse range of products that align with their interests. We strive to inspire users and connect them with relevant offers by leveraging recommender systems. To achieve this, we rely on the collective behaviors of our user-base, forming the foundation of our algorithms. However, we also incorporate content features of the items into our models, enriching recommendations with exploratory algorithms. These algorithms not only utilize historical data but also actively engage with the world, enabling us to explore new possibilities. Our major challenges revolve around developing innovative algorithms that can deliver high-quality recommendations while effectively handling Allegro's significant daily traffic. This ambitious endeavor requires us to operate at scale, ensuring seamless user experiences across the platform.

ML Ops

The MLOps team aims to optimize, scale, and deploy advanced machine learning models. We blend artificial intelligence, software engineering, and DevOps expertise to embrace the full potential of research engineers and data scientists from other teams. We orchestrate the entire machine learning lifecycle, from data preprocessing and annotation to model deployment, using the cutting-edge infrastructure of Google Cloud and Kubernetes. We're operating at a massive scale with several terabytes of data processed daily and thousands of predictions per second.

Talks

Retrieval at Scale

Retrieval at Scale

Aleksandra Osowska-Kurczab & Jacek Szczerbiński

Sponsored talk by Allegro for ML in PL Conference 2022

Watch

Blog

Trust no one, not even your training data! Machine learning from noisy data

Trust no one, not even your training data! Machine learning from noisy data

Label noise is ever-present in machine learning practice. Allegro datasets are no exception. We compared 7 methods for training classifiers robust to label noise. All of them improved…

+4
Alicja Rączkowska
Alicja Rączkowska…
0 Comments
read post
Turn-Based Offline Reinforcement Learning

Turn-Based Offline Reinforcement Learning

This blogpost is the result of a research collaboration between the Allegro Machine Learning Research team and -the Institute of Mathematics of the Polish Academy of…

+4
Riccardo Belluzzo
Riccardo Belluzzo…
0 Comments
read post

Open-Source

llm-wrapper

Versatile and powerful library designed to streamline the process of querying large language models.

+the Institute of Mathematics of the Polish Academy of…

+4
Riccardo Belluzzo
Riccardo Belluzzo…
0 Comments
read post

Open-Source

allms

Versatile and powerful library designed to streamline the process of querying large language models.

  • Simple and User-Friendly Interface,
  • Asynchronous Querying,
  • Automatic Retrying Mechanism,
  • Error Handling and Management,
  • Output Parsing
  • -
Try it!

allRank

Framework for training neural Learning-to-Rank (LTR) models, +

Try it!

allRank

Framework for training neural Learning-to-Rank (LTR) models, featuring implementations of:

  • common pointwise, pairwise and listwise loss function,
  • @@ -20,4 +20,4 @@
    • It contains a diverse set of tasks from different domains and with different objectives,
    • Most tasks are created from existing datasets but we also release the new sentiment analysis dataset from an e-commerce domain.
    • -
Try it!

HerBERT

HerBERT is a BERT-based language model trained on six different corpora for Polish language understanding. It achieves state-of-the-art results on multiple downstream tasks, including KLEJ Benchmark and Part-of-Speech tagging. We release both Base and Large variants of the model as a part of transformers library for anyone to use.

Try it!

Publications

2023

Going beyond research datasets: Novel intent discovery in the industry setting

Authors: Aleksandra Chrabrowa, Tsimur Hadeliya, Dariusz Kajtoch, Robert Mroczkowski, Piotr Rybak

Accepted at: Findings of the Association for Computational Linguistics: EACL 2023

Read

2022

Evaluation of Transfer Learning for Polish with a Text-to-Text Model

Authors: Aleksandra Chrabrowa, Łukasz Dragan, Karol Grzegorczyk, Dariusz Kajtoch, Mikołaj Koszowski, Robert Mroczkowski, Piotr Rybak

Accepted at: Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022)

Read

2021

Allegro.eu Submission to WMT21 News Translation Task

Authors: Mikołaj Koszowski, Karol Grzegorczyk, Tsimur Hadeliya

Accepted at: Proceedings of the Sixth Conference on Machine Translation

Read

2021

HerBERT: Efficiently Pretrained Transformer-based Language Model for Polish

Authors: Robert Mroczkowski, Piotr Rybak, Alina Wróblewska, Ireneusz Gawlik

Accepted at: BSNLP, accepted long paper

Read

2020

KLEJ: Comprehensive Benchmark for Polish Language Understanding

Authors: Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik

Accepted at: ACL 2020, accepted long paper

Read

2020

Context-Aware Learning to Rank with Self-Attention

Authors: Przemysław Pobrotyn, Tomasz Bartczak, Mikołaj Synowiec, Radosław Białobrzeski, Jarosław Bojar

Accepted at: SIGIR eCommerce Workshop 2020, contributed talk

Read

2020

NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting

Authors: Przemysław Pobrotyn, Radosław Białobrzeski

Accepted at: The 2021 SIGIR Workshop On eCommerce (SIGIR eCom ’21)

Read

2020

BERT-based similarity learning for product matching

Authors: Janusz Tracz, Piotr Wójcik, Kalina Jasinska-Kobus, Riccardo Belluzzo, Robert Mroczkowski, Ireneusz Gawlik

Accepted at: EComNLP 2020 COLING Workshop on Natural Language Processing in E-Commerce

Read

Job offers

Mid/Senior Software Engineer (Machine Learning)

Warsaw, Poznań

Apply

Senior Data Analyst - Allegro Pay (Machine Learning and Analytics)

Warsaw

Apply
See more job offers

Proudly built by Allegro Tech engineers

  • Github
  • Facebook
  • Twitter
\ No newline at end of file +
Try it!

HerBERT

HerBERT is a BERT-based language model trained on six different corpora for Polish language understanding. It achieves state-of-the-art results on multiple downstream tasks, including KLEJ Benchmark and Part-of-Speech tagging. We release both Base and Large variants of the model as a part of transformers library for anyone to use.

Try it!

Publications

2023

Going beyond research datasets: Novel intent discovery in the industry setting

Authors: Aleksandra Chrabrowa, Tsimur Hadeliya, Dariusz Kajtoch, Robert Mroczkowski, Piotr Rybak

Accepted at: Findings of the Association for Computational Linguistics: EACL 2023

Read

2022

Evaluation of Transfer Learning for Polish with a Text-to-Text Model

Authors: Aleksandra Chrabrowa, Łukasz Dragan, Karol Grzegorczyk, Dariusz Kajtoch, Mikołaj Koszowski, Robert Mroczkowski, Piotr Rybak

Accepted at: Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022)

Read

2021

Allegro.eu Submission to WMT21 News Translation Task

Authors: Mikołaj Koszowski, Karol Grzegorczyk, Tsimur Hadeliya

Accepted at: Proceedings of the Sixth Conference on Machine Translation

Read

2021

HerBERT: Efficiently Pretrained Transformer-based Language Model for Polish

Authors: Robert Mroczkowski, Piotr Rybak, Alina Wróblewska, Ireneusz Gawlik

Accepted at: BSNLP, accepted long paper

Read

2020

KLEJ: Comprehensive Benchmark for Polish Language Understanding

Authors: Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik

Accepted at: ACL 2020, accepted long paper

Read

2020

Context-Aware Learning to Rank with Self-Attention

Authors: Przemysław Pobrotyn, Tomasz Bartczak, Mikołaj Synowiec, Radosław Białobrzeski, Jarosław Bojar

Accepted at: SIGIR eCommerce Workshop 2020, contributed talk

Read

2020

NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting

Authors: Przemysław Pobrotyn, Radosław Białobrzeski

Accepted at: The 2021 SIGIR Workshop On eCommerce (SIGIR eCom ’21)

Read

2020

BERT-based similarity learning for product matching

Authors: Janusz Tracz, Piotr Wójcik, Kalina Jasinska-Kobus, Riccardo Belluzzo, Robert Mroczkowski, Ireneusz Gawlik

Accepted at: EComNLP 2020 COLING Workshop on Natural Language Processing in E-Commerce

Read

Job offers

Mid/Senior Software Engineer (Machine Learning)

Warsaw, Poznań

Apply

Senior Data Analyst - Allegro Pay (Machine Learning and Analytics)

Warsaw

Apply
See more job offers
\ No newline at end of file diff --git a/jobs/index.html b/jobs/index.html index 607d932..5f6a909 100644 --- a/jobs/index.html +++ b/jobs/index.html @@ -1 +1 @@ -Redirecting...
\ No newline at end of file +Redirecting...
\ No newline at end of file diff --git a/open-source/index.html b/open-source/index.html index c09e9aa..061ae73 100644 --- a/open-source/index.html +++ b/open-source/index.html @@ -1 +1 @@ -
Pozdrawiam
\ No newline at end of file +
Pozdrawiam
\ No newline at end of file diff --git a/podcast/[slug]/index.html b/podcast/[slug]/index.html index 5a7d46c..a0f85b9 100644 --- a/podcast/[slug]/index.html +++ b/podcast/[slug]/index.html @@ -1 +1 @@ -Redirecting...
\ No newline at end of file +Redirecting...
\ No newline at end of file diff --git a/podcast/index.html b/podcast/index.html index fe90cec..dde70f4 100644 --- a/podcast/index.html +++ b/podcast/index.html @@ -1 +1 @@ -Redirecting...
\ No newline at end of file +Redirecting...
\ No newline at end of file